From 8a82e2a592a3637e65081cb772f0b7e4d23d7032 Mon Sep 17 00:00:00 2001 From: Pedro Amaral Date: Wed, 13 Sep 2023 16:12:19 -0300 Subject: [PATCH 1/6] Several changes This commit incorporates several developments into spreg. Main changes: - Refactoring of the output. The way the results from the models are printed to the users has been completely refactored. The new function is simpler and generic. It is based on pandas to create an 'output' attribute to the regression object that contains the most important information on the estimates from the model. - Addition of slx_lags to all cross-section models. An attribute slx_lags was added to all cross-section models to allow for the direct estimation of SLX models. - Creation of wrapper functions for the error models. Both error_sp and error_sp_regimes now have wrapper functions that allow for easy estimation of error models based on the kp98, hom or het estimators. The wrapper functions also allow specifications with extra endogenous variables or spatial lag of the dependent variable. - Several minor updates of fixes were also developed in this commit. Despite the major changes, this commit does not break any previous code. This update was developed in a private branch by Luc Anselin and Pedro Amaral. --- spreg/diagnostics_sp.py | 44 +- spreg/error_sp.py | 562 ++++++++++++++++-------- spreg/error_sp_het.py | 106 ++++- spreg/error_sp_het_regimes.py | 255 ++++++++--- spreg/error_sp_hom.py | 115 ++++- spreg/error_sp_hom_regimes.py | 253 ++++++++--- spreg/error_sp_regimes.py | 779 +++++++++++++++++++++++++++------- spreg/ml_error.py | 56 ++- spreg/ml_error_regimes.py | 102 +++-- spreg/ml_lag.py | 84 +++- spreg/ml_lag_regimes.py | 81 ++-- spreg/ols.py | 72 +++- spreg/ols_regimes.py | 313 +++++++++----- spreg/output.py | 476 +++++++++++++++++++++ spreg/regimes.py | 42 +- spreg/twosls.py | 60 ++- spreg/twosls_regimes.py | 183 ++++++-- spreg/twosls_sp.py | 63 ++- spreg/twosls_sp_regimes.py | 120 ++++-- spreg/user_output.py | 25 +- spreg/utils.py | 66 ++- 21 files changed, 3063 insertions(+), 794 deletions(-) mode change 100644 => 100755 spreg/ml_lag.py create mode 100644 spreg/output.py diff --git a/spreg/diagnostics_sp.py b/spreg/diagnostics_sp.py index a3aaeaac..bf0dbfe7 100644 --- a/spreg/diagnostics_sp.py +++ b/spreg/diagnostics_sp.py @@ -18,7 +18,6 @@ class LMtests: - """ Lagrange Multiplier tests. Implemented as presented in :cite:`Anselin1996a` @@ -142,7 +141,6 @@ def __init__(self, ols, w, tests=["all"]): class MoranRes: - """ Moran's I for spatial autocorrelation in residuals from OLS regression @@ -396,7 +394,6 @@ def __init__(self, iv, w, case="nosp"): class spDcache: - """ Helper class to compute reusable pieces in the spatial diagnostics module ... @@ -669,7 +666,7 @@ def get_vI(ols, w, ei, spDcache): B = spDcache.AB[1] trB = np.sum(B.diagonal()) * 4.0 vi = (w.n ** 2 / (w.s0 ** 2 * (w.n - ols.k) * (w.n - ols.k + 2.0))) * ( - w.s1 + 2.0 * trA2 - trB - ((2.0 * (spDcache.trA ** 2)) / (w.n - ols.k)) + w.s1 + 2.0 * trA2 - trB - ((2.0 * (spDcache.trA ** 2)) / (w.n - ols.k)) ) return vi @@ -716,9 +713,6 @@ def akTest(iv, w, spDcache): p : float P-value of the test - ToDo: - * Code in as Nancy - * Compare both """ mi = get_mI(iv, w, spDcache) # Phi2 @@ -731,6 +725,42 @@ def akTest(iv, w, spDcache): return (mi, ak[0][0], pval[0][0]) +def comfac_test(lambd, beta, gamma, vm): + """ + Computes the Spatial Common Factor Hypothesis test as shown in Anselin (1988, p. 226-229) + + Parameters + ---------- + + lambd : float + Spatial autoregressive coefficient (as in lambd*Wy) + beta : array + Coefficients of the exogenous (not spatially lagged) variables, without the constant (as in X*beta) + gamma : array + coefficients of the spatially lagged exogenous variables (as in WX*gamma) + vm : array + Variance-covariance matrix of the coefficients + Obs. Needs to match the order of theta' = [beta', gamma', lambda] + + Returns + ------- + W : float + Wald statistic + pvalue : float + P value for Wald statistic calculated as a Chi sq. distribution + with k-1 degrees of freedom + + """ + g = lambd * beta + gamma + G = np.vstack((lambd * np.eye(beta.shape[0]), np.eye(beta.shape[0]), beta.T)) + + GVGi = la.inv(np.dot(G.T, np.dot(vm, G))) + W = np.dot(g.T, np.dot(GVGi, g))[0][0] + df = G.shape[1] + pvalue = chisqprob(W, df) + return W, pvalue + + def _test(): import doctest diff --git a/spreg/error_sp.py b/spreg/error_sp.py index 5fe47e00..765fddc8 100644 --- a/spreg/error_sp.py +++ b/spreg/error_sp.py @@ -9,23 +9,16 @@ import numpy as np from numpy import linalg as la from . import ols as OLS -from libpysal.weights.spatial_lag import lag_spatial -from .utils import power_expansion, set_endog, iter_msg, sp_att -from .utils import ( - get_A1_hom, - get_A2_hom, - get_A1_het, - optim_moments, - get_spFilter, - get_lags, - _moments2eqs, -) -from .utils import spdot, RegressionPropsY, set_warn +from .utils import set_endog, sp_att, optim_moments, get_spFilter, get_lags, spdot, RegressionPropsY, set_warn from . import twosls as TSLS from . import user_output as USER -from . import summary_output as SUMMARY +import pandas as pd +from .output import output, _spat_pseudo_r2 +from .error_sp_het import GM_Error_Het, GM_Endog_Error_Het, GM_Combo_Het +from .error_sp_hom import GM_Error_Hom, GM_Endog_Error_Hom, GM_Combo_Hom -__all__ = ["GM_Error", "GM_Endog_Error", "GM_Combo"] + +__all__ = ["GMM_Error", "GM_Error", "GM_Endog_Error", "GM_Combo"] class BaseGM_Error(RegressionPropsY): @@ -138,6 +131,9 @@ class GM_Error(BaseGM_Error): independent (exogenous) variable, excluding the constant w : pysal W object Spatial weights object (always needed) + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the SDEM type. vm : boolean If True, include variance-covariance matrix in summary results @@ -149,9 +145,13 @@ class GM_Error(BaseGM_Error): Name of weights matrix for use in output name_ds : string Name of dataset for use in output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -287,7 +287,7 @@ class GM_Error(BaseGM_Error): """ def __init__( - self, y, x, w, vm=False, name_y=None, name_x=None, name_w=None, name_ds=None + self, y, x, w, slx_lags=0, vm=False, name_y=None, name_x=None, name_w=None, name_ds=None, latex=False ): n = USER.check_arrays(y, x) @@ -295,14 +295,24 @@ def __init__( USER.check_weights(w, y, w_required=True) x_constant, name_x, warn = USER.check_constant(x, name_x) set_warn(self, warn) + + self.title = "GM SPATIALLY WEIGHTED LEAST SQUARES" + if slx_lags >0: + lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + x_constant = np.hstack((x_constant, lag_x)) + name_x += USER.set_name_spatial_lags(name_x, slx_lags) + self.title += " WITH SLX (SDEM)" + BaseGM_Error.__init__(self, y=y, x=x_constant, w=w.sparse) - self.title = "SPATIALLY WEIGHTED LEAST SQUARES" self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) self.name_x = USER.set_name_x(name_x, x_constant) self.name_x.append("lambda") self.name_w = USER.set_name_w(name_w, w) - SUMMARY.GM_Error(reg=self, w=w, vm=vm) + self.output = pd.DataFrame(self.name_x, columns=['var_names']) + self.output['var_type'] = ['x'] * (len(self.name_x) - 1) + ['lambda'] + self.output['regime'], self.output['equation'] = (0, 0) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) class BaseGM_Endog_Error(RegressionPropsY): @@ -379,7 +389,7 @@ class BaseGM_Endog_Error(RegressionPropsY): >>> w.transform='r' >>> model = spreg.error_sp.BaseGM_Endog_Error(y, x, yend, q, w=w.sparse) >>> np.around(model.betas, decimals=4) - array([[82.573 ], + array([[82.5723], [ 0.581 ], [-1.4481], [ 0.3499]]) @@ -438,6 +448,9 @@ class GM_Endog_Error(BaseGM_Endog_Error): this should not contain any variables from x) w : pysal W object Spatial weights object (always needed) + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the SDEM type. vm : boolean If True, include variance-covariance matrix in summary results @@ -453,9 +466,13 @@ class GM_Endog_Error(BaseGM_Endog_Error): Name of weights matrix for use in output name_ds : string Name of dataset for use in output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -603,12 +620,12 @@ class GM_Endog_Error(BaseGM_Endog_Error): >>> print(model.name_z) ['CONSTANT', 'inc', 'hoval', 'lambda'] >>> np.around(model.betas, decimals=4) - array([[82.573 ], + array([[82.5723], [ 0.581 ], [-1.4481], [ 0.3499]]) >>> np.around(model.std_err, decimals=4) - array([16.1381, 1.3545, 0.7862]) + array([16.1382, 1.3545, 0.7862]) """ @@ -619,6 +636,7 @@ def __init__( yend, q, w, + slx_lags=0, vm=False, name_y=None, name_x=None, @@ -626,6 +644,7 @@ def __init__( name_q=None, name_w=None, name_ds=None, + latex=False, ): n = USER.check_arrays(y, x, yend, q) @@ -633,8 +652,13 @@ def __init__( USER.check_weights(w, y, w_required=True) x_constant, name_x, warn = USER.check_constant(x, name_x) set_warn(self, warn) + self.title = "GM SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES" + if slx_lags >0: + lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + x_constant = np.hstack((x_constant, lag_x)) + name_x += USER.set_name_spatial_lags(name_x, slx_lags) + self.title += " WITH SLX (SDEM)" BaseGM_Endog_Error.__init__(self, y=y, x=x_constant, w=w.sparse, yend=yend, q=q) - self.title = "SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES" self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) self.name_x = USER.set_name_x(name_x, x_constant) @@ -644,137 +668,14 @@ def __init__( self.name_q = USER.set_name_q(name_q, q) self.name_h = USER.set_name_h(self.name_x, self.name_q) self.name_w = USER.set_name_w(name_w, w) - SUMMARY.GM_Endog_Error(reg=self, w=w, vm=vm) - - -class BaseGM_Combo(BaseGM_Endog_Error): - - """ - GMM method for a spatial lag and error model, with endogenous variables - (note: no consistency checks, diagnostics or constant added); based on - Kelejian and Prucha (1998, 1999) :cite:`Kelejian1998` :cite:`Kelejian1999`. - - Parameters - ---------- - y : array - nx1 array for dependent variable - x : array - Two dimensional array with n rows and one column for each - independent (exogenous) variable, excluding the constant - yend : array - Two dimensional array with n rows and one column for each - endogenous variable - q : array - Two dimensional array with n rows and one column for each - external exogenous variable to use as instruments (note: - this should not contain any variables from x) - w : Sparse matrix - Spatial weights sparse matrix - w_lags : integer - Orders of W to include as instruments for the spatially - lagged dependent variable. For example, w_lags=1, then - instruments are WX; if w_lags=2, then WX, WWX; and so on. - lag_q : boolean - If True, then include spatial lags of the additional - instruments (q). - - Attributes - ---------- - betas : array - kx1 array of estimated coefficients - u : array - nx1 array of residuals - e_filtered : array - nx1 array of spatially filtered residuals - predy : array - nx1 array of predicted y values - n : integer - Number of observations - k : integer - Number of variables for which coefficients are estimated - (including the constant) - y : array - nx1 array for dependent variable - x : array - Two dimensional array with n rows and one column for each - independent (exogenous) variable, including the constant - yend : array - Two dimensional array with n rows and one column for each - endogenous variable - z : array - nxk array of variables (combination of x and yend) - mean_y : float - Mean of dependent variable - std_y : float - Standard deviation of dependent variable - vm : array - Variance covariance matrix (kxk) - sig2 : float - Sigma squared used in computations + self.output = pd.DataFrame(self.name_z, + columns=['var_names']) + self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * len(self.name_yend) + ['lambda'] + self.output['regime'], self.output['equation'] = (0, 0) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) - Examples - -------- - - >>> import numpy as np - >>> import libpysal - >>> import spreg - >>> db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r') - >>> y = np.array(db.by_col("CRIME")) - >>> y = np.reshape(y, (49,1)) - >>> X = [] - >>> X.append(db.by_col("INC")) - >>> X = np.array(X).T - >>> w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) - >>> w.transform = 'r' - >>> w_lags = 1 - >>> yd2, q2 = spreg.set_endog(y, X, w, None, None, w_lags, True) - >>> X = np.hstack((np.ones(y.shape),X)) - - Example only with spatial lag - - >>> reg = spreg.error_sp.BaseGM_Combo(y, X, yend=yd2, q=q2, w=w.sparse) - - Print the betas - - >>> print(np.around(np.hstack((reg.betas[:-1],np.sqrt(reg.vm.diagonal()).reshape(3,1))),3)) - [[39.059 11.86 ] - [-1.404 0.391] - [ 0.467 0.2 ]] - - And lambda - - >>> print('Lamda: ', np.around(reg.betas[-1], 3)) - Lamda: [-0.048] - - Example with both spatial lag and other endogenous variables - - >>> X = [] - >>> X.append(db.by_col("INC")) - >>> X = np.array(X).T - >>> yd = [] - >>> yd.append(db.by_col("HOVAL")) - >>> yd = np.array(yd).T - >>> q = [] - >>> q.append(db.by_col("DISCBD")) - >>> q = np.array(q).T - >>> yd2, q2 = spreg.set_endog(y, X, w, yd, q, w_lags, True) - >>> X = np.hstack((np.ones(y.shape),X)) - >>> reg = spreg.error_sp.BaseGM_Combo(y, X, yd2, q2, w=w.sparse) - >>> betas = np.array([['CONSTANT'],['INC'],['HOVAL'],['W_CRIME']]) - >>> print(np.hstack((betas, np.around(np.hstack((reg.betas[:-1], np.sqrt(reg.vm.diagonal()).reshape(4,1))),4)))) - [['CONSTANT' '50.0944' '14.3593'] - ['INC' '-0.2552' '0.5667'] - ['HOVAL' '-0.6885' '0.3029'] - ['W_CRIME' '0.4375' '0.2314']] - - """ - - def __init__(self, y, x, yend=None, q=None, w=None, w_lags=1, lag_q=True): - - BaseGM_Endog_Error.__init__(self, y=y, x=x, w=w, yend=yend, q=q) - -class GM_Combo(BaseGM_Combo): +class GM_Combo(BaseGM_Endog_Error): """ GMM method for a spatial lag and error model with endogenous variables, @@ -801,9 +702,13 @@ class GM_Combo(BaseGM_Combo): Orders of W to include as instruments for the spatially lagged dependent variable. For example, w_lags=1, then instruments are WX; if w_lags=2, then WX, WWX; and so on. + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the General Nesting + Spatial Model (GNSM) type. lag_q : boolean If True, then include spatial lags of the additional - instruments (q). + instruments (q) vm : boolean If True, include variance-covariance matrix in summary results @@ -819,9 +724,13 @@ class GM_Combo(BaseGM_Combo): Name of weights matrix for use in output name_ds : string Name of dataset for use in output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -1015,6 +924,7 @@ def __init__( q=None, w=None, w_lags=1, + slx_lags=0, lag_q=True, vm=False, name_y=None, @@ -1023,6 +933,7 @@ def __init__( name_q=None, name_w=None, name_ds=None, + latex=False, ): n = USER.check_arrays(y, x, yend, q) @@ -1030,23 +941,23 @@ def __init__( USER.check_weights(w, y, w_required=True) x_constant, name_x, warn = USER.check_constant(x, name_x) set_warn(self, warn) - yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) - BaseGM_Combo.__init__( - self, - y=y, - x=x_constant, - w=w.sparse, - yend=yend2, - q=q2, - w_lags=w_lags, - lag_q=lag_q, - ) + if slx_lags == 0: + yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) + else: + yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags) + x_constant = np.hstack((x_constant, wx)) + + BaseGM_Endog_Error.__init__(self, y=y, x=x_constant, w=w.sparse, yend=yend2, q=q2) + self.rho = self.betas[-2] self.predy_e, self.e_pred, warn = sp_att( w, self.y, self.predy, yend2[:, -1].reshape(self.n, 1), self.rho ) set_warn(self, warn) - self.title = "SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES" + self.title = "SPATIALLY WEIGHTED 2SLS - GM-COMBO MODEL" + if slx_lags > 0: + name_x += USER.set_name_spatial_lags(name_x, slx_lags) + self.title += " WITH SLX (GNSM)" self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) self.name_x = USER.set_name_x(name_x, x_constant) @@ -1058,7 +969,294 @@ def __init__( self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) self.name_h = USER.set_name_h(self.name_x, self.name_q) self.name_w = USER.set_name_w(name_w, w) - SUMMARY.GM_Combo(reg=self, w=w, vm=vm) + self.output = pd.DataFrame(self.name_z, + columns=['var_names']) + self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend) - 1) + ['rho', 'lambda'] + self.output['regime'], self.output['equation'] = (0, 0) + self.other_top = _spat_pseudo_r2(self) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) + + +class GMM_Error(GM_Error, GM_Endog_Error, GM_Combo, GM_Error_Het, GM_Endog_Error_Het, + GM_Combo_Het, GM_Error_Hom, GM_Endog_Error_Hom, GM_Combo_Hom): + + """ + Wrapper function to call any of the GMM methods for a spatial error model available in spreg + + Parameters + ---------- + y : array + nx1 array for dependent variable + x : array + Two dimensional array with n rows and one column for each + independent (exogenous) variable, excluding the constant + w : pysal W object + Spatial weights object (always needed) + yend : array + Two dimensional array with n rows and one column for each + endogenous variable (if any) + q : array + Two dimensional array with n rows and one column for each + external exogenous variable to use as instruments (if any) + (note: this should not contain any variables from x) + estimator : string + Choice of estimator to be used. Options are: 'het', which + is robust to heteroskedasticity, 'hom', which assumes + homoskedasticity, and 'kp98', which does not provide + inference on the spatial parameter for the error term. + add_wy : boolean + If True, then a spatial lag of the dependent variable is included. + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the SDEM or GNSM type. + vm : boolean + If True, include variance-covariance matrix in summary + results + name_y : string + Name of dependent variable for use in output + name_x : list of strings + Names of independent variables for use in output + name_w : string + Name of weights matrix for use in output + name_yend : list of strings + Names of endogenous variables for use in output + name_q : list of strings + Names of instruments for use in output + name_ds : string + Name of dataset for use in output + latex : boolean + Specifies if summary is to be printed in latex format + **kwargs : keywords + Additional arguments to pass on to the estimators. + See the specific functions for details on what can be used. + + Attributes + ---------- + output : dataframe + regression results pandas dataframe + summary : string + Summary of regression results and diagnostics (note: use in + conjunction with the print command) + betas : array + kx1 array of estimated coefficients + u : array + nx1 array of residuals + e_filtered : array + nx1 array of spatially filtered residuals + predy : array + nx1 array of predicted y values + n : integer + Number of observations + k : integer + Number of variables for which coefficients are estimated + (including the constant) + y : array + nx1 array for dependent variable + x : array + Two dimensional array with n rows and one column for each + independent (exogenous) variable, including the constant + mean_y : float + Mean of dependent variable + std_y : float + Standard deviation of dependent variable + pr2 : float + Pseudo R squared (squared correlation between y and ypred) + vm : array + Variance covariance matrix (kxk) + sig2 : float + Sigma squared used in computations + std_err : array + 1xk array of standard errors of the betas + z_stat : list of tuples + z statistic; each tuple contains the pair (statistic, + p-value), where each is a float + name_y : string + Name of dependent variable for use in output + name_x : list of strings + Names of independent variables for use in output + name_w : string + Name of weights matrix for use in output + name_ds : string + Name of dataset for use in output + title : string + Name of the regression method used + name_yend : list of strings (optional) + Names of endogenous variables for use in output + name_z : list of strings (optional) + Names of exogenous and endogenous variables for use in + output + name_q : list of strings (optional) + Names of external instruments + name_h : list of strings (optional) + Names of all instruments used in ouput + + + Examples + -------- + + We first need to import the needed modules, namely numpy to convert the + data we read into arrays that ``spreg`` understands and ``libpysal`` to + handle the weights and file management. + + >>> import numpy as np + >>> import libpysal + >>> from libpysal.examples import load_example + + Open data on NCOVR US County Homicides (3085 areas) using libpysal.io.open(). + This is the DBF associated with the NAT shapefile. Note that + libpysal.io.open() also reads data in CSV format; since the actual class + requires data to be passed in as numpy arrays, the user can read their + data in using any method. + + >>> nat = load_example('Natregimes') + >>> db = libpysal.io.open(nat.get_path("natregimes.dbf"),'r') + + Extract the HR90 column (homicide rates in 1990) from the DBF file and make it the + dependent variable for the regression. Note that PySAL requires this to be + an numpy array of shape (n, 1) as opposed to the also common shape of (n, ) + that other packages accept. + + >>> y_var = 'HR90' + >>> y = np.array([db.by_col(y_var)]).reshape(3085,1) + + Extract UE90 (unemployment rate) and PS90 (population structure) vectors from + the DBF to be used as independent variables in the regression. Other variables + can be inserted by adding their names to x_var, such as x_var = ['Var1','Var2','...] + Note that PySAL requires this to be an nxj numpy array, where j is the + number of independent variables (not including a constant). By default + this model adds a vector of ones to the independent variables passed in. + + >>> x_var = ['PS90','UE90'] + >>> x = np.array([db.by_col(name) for name in x_var]).T + + Since we want to run a spatial error model, we need to specify + the spatial weights matrix that includes the spatial configuration of the + observations. To do that, we can open an already existing gal file or + create a new one. In this case, we will create one from ``NAT.shp``. + + >>> w = libpysal.weights.Rook.from_shapefile(nat.get_path("natregimes.shp")) + + Unless there is a good reason not to do it, the weights have to be + row-standardized so every row of the matrix sums to one. Among other + things, this allows to interpret the spatial lag of a variable as the + average value of the neighboring observations. In PySAL, this can be + easily performed in the following way: + + >>> w.transform = 'r' + + The GMM_Error class can run error models and SARAR models, that is a spatial lag+error model. + In this example we will run a simple version of the latter, where we have the + spatial effects as well as exogenous variables. Since it is a spatial + model, we have to pass in the weights matrix. If we want to + have the names of the variables printed in the output summary, we will + have to pass them in as well, although this is optional. + + >>> from spreg import GMM_Error + >>> model = GMM_Error(y, x, w=w, add_wy=True, name_y=y_var, name_x=x_var, name_ds='NAT') + + Once we have run the model, we can explore a little bit the output. The + regression object we have created has many attributes so take your time to + discover them. + + >>> print(model.output) + var_names coefficients std_err zt_stat prob + 0 CONSTANT 2.176007 1.115807 1.950165 0.051156 + 1 PS90 1.108054 0.207964 5.328096 0.0 + 2 UE90 0.664362 0.061294 10.83893 0.0 + 3 W_HR90 -0.066539 0.154395 -0.430964 0.666494 + 4 lambda 0.765087 0.04268 17.926245 0.0 + + This class also allows the user to run a spatial lag+error model with the + extra feature of including non-spatial endogenous regressors. This means + that, in addition to the spatial lag and error, we consider some of the + variables on the right-hand side of the equation as endogenous and we + instrument for this. In this case we consider RD90 (resource deprivation) + as an endogenous regressor. We use FP89 (families below poverty) + for this and hence put it in the instruments parameter, 'q'. + + >>> yd_var = ['RD90'] + >>> yd = np.array([db.by_col(name) for name in yd_var]).T + >>> q_var = ['FP89'] + >>> q = np.array([db.by_col(name) for name in q_var]).T + + And then we can run and explore the model analogously to the previous combo: + + >>> model = GMM_Error(y, x, yend=yd, q=q, w=w, add_wy=True, name_y=y_var, name_x=x_var, name_yend=yd_var, name_q=q_var, name_ds='NAT') + >>> print(model.output) + var_names coefficients std_err zt_stat prob + 0 CONSTANT 5.44035 0.560476 9.706652 0.0 + 1 PS90 1.427042 0.1821 7.836572 0.0 + 2 UE90 -0.075224 0.050031 -1.503544 0.132699 + 3 RD90 3.316266 0.261269 12.692924 0.0 + 4 W_HR90 0.200314 0.057433 3.487777 0.000487 + 5 lambda 0.136933 0.070098 1.953457 0.050765 + + The class also allows for estimating a GNS model by adding spatial lags of the exogenous variables, using the argument slx_lags: + + >>> model = GMM_Error(y, x, w=w, add_wy=True, slx_lags=1, name_y=y_var, name_x=x_var, name_ds='NAT') + >>> print(model.output) + var_names coefficients std_err zt_stat prob + 0 CONSTANT -0.554756 0.551765 -1.00542 0.314695 + 1 PS90 1.09369 0.225895 4.841583 0.000001 + 2 UE90 0.697393 0.082744 8.428291 0.0 + 3 W_PS90 -1.533378 0.396651 -3.865811 0.000111 + 4 W_UE90 -1.107944 0.33523 -3.305028 0.00095 + 5 W_HR90 1.529277 0.389354 3.927728 0.000086 + 6 lambda -0.917928 0.079569 -11.53625 0.0 + + + """ + + def __init__( + self, y, x, w, yend=None, q=None, estimator='het', add_wy=False, slx_lags=0, vm=False, name_y=None, name_x=None, name_w=None, name_yend=None, + name_q=None, name_ds=None, latex=False, **kwargs): + + if estimator == 'het': + if yend is None and not add_wy: + GM_Error_Het.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + name_w=name_w, name_ds=name_ds, latex=latex, **kwargs) + elif yend is not None and not add_wy: + GM_Endog_Error_Het.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, **kwargs) + elif add_wy: + GM_Combo_Het.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, **kwargs) + else: + set_warn(self, 'Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.') + GM_Error_Het.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + name_w=name_w, name_ds=name_ds, latex=latex) + elif estimator == 'hom': + if yend is None and not add_wy: + GM_Error_Hom.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + name_w=name_w, name_ds=name_ds, latex=latex, **kwargs) + elif yend is not None and not add_wy: + GM_Endog_Error_Hom.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, **kwargs) + elif add_wy: + GM_Combo_Hom.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, **kwargs) + else: + set_warn(self, 'Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.') + GM_Error_Hom.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + name_w=name_w, name_ds=name_ds, latex=latex) + elif estimator == 'kp98': + if yend is None and not add_wy: + GM_Error.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + name_w=name_w, name_ds=name_ds, latex=latex, **kwargs) + elif yend is not None and not add_wy: + GM_Endog_Error.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, **kwargs) + elif add_wy: + GM_Combo.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, **kwargs) + else: + set_warn(self, 'Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.') + GM_Error.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + name_w=name_w, name_ds=name_ds, latex=latex) + else: + set_warn(self, 'Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.') + GM_Error_Het.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + name_w=name_w, name_ds=name_ds, latex=latex) def _momentsGM_Error(w, u): @@ -1103,16 +1301,30 @@ def _test(): if __name__ == "__main__": _test() - import libpysal + import numpy as np + import libpysal - dbf = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") - y = np.array([dbf.by_col("HOVAL")]).T - names_to_extract = ["INC", "CRIME"] - x = np.array([dbf.by_col(name) for name in names_to_extract]).T - w = libpysal.io.open(libpysal.examples.get_path("columbus.gal"), "r").read() - w.transform = "r" - model = GM_Error( - y, x, w, name_y="hoval", name_x=["income", "crime"], name_ds="columbus" - ) - print(model.summary) + db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r') + y = np.array(db.by_col("HOVAL")) + y = np.reshape(y, (49,1)) + X = [] + X.append(db.by_col("INC")) + X = np.array(X).T + yd = [] + yd.append(db.by_col("CRIME")) + yd = np.array(yd).T + q = [] + q.append(db.by_col("DISCBD")) + q = np.array(q).T + + w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + w.transform = 'r' + #reg = GM_Error(y, X, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', vm=True) + #reg = GM_Endog_Error(y, X, yd, q, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], + # name_q=['discbd'], name_ds='columbus',vm=True) + reg = GM_Combo(y, X, yd, q, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], name_q=['discbd'], + name_ds='columbus', vm=True) + + print(reg.output) + print(reg.summary) \ No newline at end of file diff --git a/spreg/error_sp_het.py b/spreg/error_sp_het.py index f40a7ae9..50b838fb 100755 --- a/spreg/error_sp_het.py +++ b/spreg/error_sp_het.py @@ -12,12 +12,13 @@ import numpy.linalg as la from . import ols as OLS from . import user_output as USER -from . import summary_output as SUMMARY from . import twosls as TSLS from . import utils as UTILS -from .utils import RegressionPropsY, spdot, set_endog, sphstack, set_warn +from .utils import RegressionPropsY, spdot, set_endog, sphstack, set_warn, get_lags from scipy import sparse as SP from libpysal.weights.spatial_lag import lag_spatial +import pandas as pd +from .output import output, _summary_iteration, _spat_pseudo_r2 __all__ = ["GM_Error_Het", "GM_Endog_Error_Het", "GM_Combo_Het"] @@ -170,6 +171,9 @@ class GM_Error_Het(BaseGM_Error_Het): independent (exogenous) variable, excluding the constant w : pysal W object Spatial weights object + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the SDEM type. max_iter : int Maximum number of iterations of steps 2a and 2b from :cite:`Arraiz2010`. Note: epsilon provides an additional @@ -191,9 +195,13 @@ class GM_Error_Het(BaseGM_Error_Het): Name of weights matrix for use in output name_ds : string Name of dataset for use in output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -336,6 +344,7 @@ def __init__( y, x, w, + slx_lags=0, max_iter=1, epsilon=0.00001, step1c=False, @@ -344,6 +353,7 @@ def __init__( name_x=None, name_w=None, name_ds=None, + latex=False, ): n = USER.check_arrays(y, x) @@ -351,6 +361,12 @@ def __init__( USER.check_weights(w, y, w_required=True) x_constant, name_x, warn = USER.check_constant(x, name_x) set_warn(self, warn) + self.title = "GM SPATIALLY WEIGHTED LEAST SQUARES (HET)" + if slx_lags >0: + lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + x_constant = np.hstack((x_constant, lag_x)) + name_x += USER.set_name_spatial_lags(name_x, slx_lags) + self.title += " WITH SLX (SDEM)" BaseGM_Error_Het.__init__( self, y, @@ -360,13 +376,16 @@ def __init__( step1c=step1c, epsilon=epsilon, ) - self.title = "SPATIALLY WEIGHTED LEAST SQUARES (HET)" self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) self.name_x = USER.set_name_x(name_x, x_constant) self.name_x.append("lambda") self.name_w = USER.set_name_w(name_w, w) - SUMMARY.GM_Error_Het(reg=self, w=w, vm=vm) + self.output = pd.DataFrame(self.name_x, columns=['var_names']) + self.output['var_type'] = ['x'] * (len(self.name_x)-1) + ['lambda'] + self.output['regime'], self.output['equation'] = (0, 0) + self.other_top = _summary_iteration(self) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) class BaseGM_Endog_Error_Het(RegressionPropsY): @@ -582,6 +601,9 @@ class GM_Endog_Error_Het(BaseGM_Endog_Error_Het): this should not contain any variables from x) w : pysal W object Spatial weights object + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the SDEM type. max_iter : int Maximum number of iterations of steps 2a and 2b from :cite:`Arraiz2010`. Note: epsilon provides an additional @@ -611,9 +633,13 @@ class GM_Endog_Error_Het(BaseGM_Endog_Error_Het): Name of weights matrix for use in output name_ds : string Name of dataset for use in output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -788,6 +814,7 @@ def __init__( yend, q, w, + slx_lags=0, max_iter=1, epsilon=0.00001, step1c=False, @@ -799,6 +826,7 @@ def __init__( name_q=None, name_w=None, name_ds=None, + latex=False, ): n = USER.check_arrays(y, x, yend, q) @@ -806,6 +834,12 @@ def __init__( USER.check_weights(w, y, w_required=True) x_constant, name_x, warn = USER.check_constant(x, name_x) set_warn(self, warn) + self.title = "GM SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES (HET)" + if slx_lags > 0: + lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + x_constant = np.hstack((x_constant, lag_x)) + name_x += USER.set_name_spatial_lags(name_x, slx_lags) + self.title += " WITH SLX (SDEM)" BaseGM_Endog_Error_Het.__init__( self, y=y, @@ -818,7 +852,6 @@ def __init__( epsilon=epsilon, inv_method=inv_method, ) - self.title = "SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES (HET)" self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) self.name_x = USER.set_name_x(name_x, x_constant) @@ -828,7 +861,12 @@ def __init__( self.name_q = USER.set_name_q(name_q, q) self.name_h = USER.set_name_h(self.name_x, self.name_q) self.name_w = USER.set_name_w(name_w, w) - SUMMARY.GM_Endog_Error_Het(reg=self, w=w, vm=vm) + self.output = pd.DataFrame(self.name_z, + columns=['var_names']) + self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * len(self.name_yend) + ['lambda'] + self.output['regime'], self.output['equation'] = (0, 0) + self.other_top = _summary_iteration(self) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) class BaseGM_Combo_Het(BaseGM_Endog_Error_Het): @@ -1026,6 +1064,10 @@ class GM_Combo_Het(BaseGM_Combo_Het): Orders of W to include as instruments for the spatially lagged dependent variable. For example, w_lags=1, then instruments are WX; if w_lags=2, then WX, WWX; and so on. + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the General Nesting + Spatial Model (GNSM) type. lag_q : boolean If True, then include spatial lags of the additional instruments (q). @@ -1058,9 +1100,13 @@ class GM_Combo_Het(BaseGM_Combo_Het): Name of weights matrix for use in output name_ds : string Name of dataset for use in output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -1255,6 +1301,7 @@ def __init__( q=None, w=None, w_lags=1, + slx_lags=0, lag_q=True, max_iter=1, epsilon=0.00001, @@ -1267,6 +1314,7 @@ def __init__( name_q=None, name_w=None, name_ds=None, + latex=False, ): n = USER.check_arrays(y, x, yend, q) @@ -1274,7 +1322,13 @@ def __init__( USER.check_weights(w, y, w_required=True) x_constant, name_x, warn = USER.check_constant(x, name_x) set_warn(self, warn) - yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) + + if slx_lags == 0: + yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) + else: + yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags) + x_constant = np.hstack((x_constant, wx)) + BaseGM_Combo_Het.__init__( self, y=y, @@ -1294,7 +1348,10 @@ def __init__( w, self.y, self.predy, yend2[:, -1].reshape(self.n, 1), self.rho ) UTILS.set_warn(self, warn) - self.title = "SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES (HET)" + self.title = "SPATIALLY WEIGHTED 2SLS- GM-COMBO MODEL (HET)" + if slx_lags > 0: + name_x += USER.set_name_spatial_lags(name_x, slx_lags) + self.title += " WITH SLX (GNSM)" self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) self.name_x = USER.set_name_x(name_x, x_constant) @@ -1306,7 +1363,13 @@ def __init__( self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) self.name_h = USER.set_name_h(self.name_x, self.name_q) self.name_w = USER.set_name_w(name_w, w) - SUMMARY.GM_Combo_Het(reg=self, w=w, vm=vm) + self.output = pd.DataFrame(self.name_z, + columns=['var_names']) + self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend)-1) + ['rho', 'lambda'] + self.output['regime'], self.output['equation'] = (0, 0) + self.other_top = _spat_pseudo_r2(self) + self.other_top += _summary_iteration(self) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) # Functions @@ -1551,3 +1614,28 @@ def _test(): if __name__ == "__main__": _test() + import numpy as np + import libpysal + + db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r') + y = np.array(db.by_col("HOVAL")) + y = np.reshape(y, (49,1)) + X = [] + X.append(db.by_col("INC")) + X = np.array(X).T + yd = [] + yd.append(db.by_col("CRIME")) + yd = np.array(yd).T + q = [] + q.append(db.by_col("DISCBD")) + q = np.array(q).T + + w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + w.transform = 'r' + # reg = GM_Error_Het(y, X, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', vm=True) + # reg = GM_Endog_Error_Het(y, X, yd, q, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], + # name_q=['discbd'], name_ds='columbus',vm=True) + reg = GM_Combo_Het(y, X, yd, q, w=w, step1c=True, name_x=['inc'], name_y='hoval', name_yend=['crime'], name_q=['discbd'], name_ds='columbus', + vm=True) + print(reg.output) + print(reg.summary) \ No newline at end of file diff --git a/spreg/error_sp_het_regimes.py b/spreg/error_sp_het_regimes.py index 9c5a4586..2f812d74 100644 --- a/spreg/error_sp_het_regimes.py +++ b/spreg/error_sp_het_regimes.py @@ -6,7 +6,6 @@ import numpy as np import multiprocessing as mp from . import user_output as USER -from . import summary_output as SUMMARY from . import utils as UTILS from . import regimes as REGI from .ols import BaseOLS @@ -22,10 +21,12 @@ get_vc_het_tsls, get_Omega_GS2SLS, ) -from .utils import RegressionPropsY, spdot, set_endog, sphstack, set_warn, sp_att +from .utils import RegressionPropsY, spdot, set_endog, sphstack, set_warn, sp_att, get_lags from scipy import sparse as SP from libpysal.weights.spatial_lag import lag_spatial from platform import system +import pandas as pd +from .output import output, _summary_iteration, _spat_pseudo_r2 class GM_Error_Het_Regimes(RegressionPropsY, REGI.Regimes_Frame): @@ -64,6 +65,9 @@ class GM_Error_Het_Regimes(RegressionPropsY, REGI.Regimes_Frame): If True, a separate regression is run for each regime. regime_lag_sep: boolean Always False, kept for consistency, ignored. + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the SDEM type. max_iter : int Maximum number of iterations of steps 2a and 2b from Arraiz et al. Note: epsilon provides an additional stop condition. @@ -90,9 +94,13 @@ class GM_Error_Het_Regimes(RegressionPropsY, REGI.Regimes_Frame): Name of dataset for use in output name_regimes : string Name of regime variable for use in the output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -304,6 +312,7 @@ def __init__( cols2regi="all", regime_err_sep=False, regime_lag_sep=False, + slx_lags=0, cores=False, vm=False, name_y=None, @@ -311,6 +320,7 @@ def __init__( name_w=None, name_ds=None, name_regimes=None, + latex=False, ): n = USER.check_arrays(y, x) @@ -329,6 +339,12 @@ def __init__( x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) + + if slx_lags >0: + lag_x = get_lags(w, x_constant, slx_lags) + x_constant = np.hstack((x_constant, lag_x)) + name_x += USER.set_name_spatial_lags(name_x, slx_lags) + self.name_x_r = USER.set_name_x(name_x, x_constant) cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant) @@ -339,11 +355,12 @@ def __init__( if regime_err_sep == True: if set(cols2regi) == set([True]): - self._error_regimes_multi( + self._error_het_regimes_multi( y, x_constant, regimes, w, + slx_lags, cores, max_iter, epsilon, @@ -351,21 +368,23 @@ def __init__( cols2regi, vm, name_x, + latex, ) else: raise Exception( - "All coefficients must vary accross regimes if regime_err_sep = True." + "All coefficients must vary across regimes if regime_err_sep = True." ) else: x_constant = sphstack(np.ones((x_constant.shape[0], 1)), x_constant) name_x = USER.set_name_x(name_x, x_constant) - self.x, self.name_x = REGI.Regimes_Frame.__init__( + self.x, self.name_x, x_rlist = REGI.Regimes_Frame.__init__( self, x_constant, regimes, constant_regi=None, cols2regi=cols2regi, names=name_x, + rlist=True, ) ols = BaseOLS(y=y, x=self.x) self.k = ols.x.shape[1] @@ -412,16 +431,25 @@ def __init__( self.vm = get_vm_het(moments_i[0], lambda3, self, w.sparse, vc3) self.betas = np.vstack((ols_s.betas, lambda3)) self.e_filtered = self.u - lambda3 * lag_spatial(w, self.u) - self.title = "SPATIALLY WEIGHTED LEAST SQUARES (HET) - REGIMES" + if slx_lags == 0: + self.title = "GM SPATIALLY WEIGHTED MODEL (HET) - REGIMES" + else: + self.title = "GM SPATIALLY WEIGHTED MODEL + SLX (SDEM-HET) - REGIMES" + self.name_x.append("lambda") self.kf += 1 self.chow = REGI.Chow(self) self._cache = {} - - SUMMARY.GM_Error_Het(reg=self, w=w, vm=vm, regimes=True) - - def _error_regimes_multi( - self, y, x, regimes, w, cores, max_iter, epsilon, step1c, cols2regi, vm, name_x + self.output = pd.DataFrame(self.name_x, + columns=['var_names']) + self.output['var_type'] = ['x']*(len(self.name_x)-1)+['lambda'] + self.output['regime'] = x_rlist + ['_Global'] + self.output['equation'] = 0 + self.other_top = _summary_iteration(self) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) + + def _error_het_regimes_multi( + self, y, x, regimes, w, slx_lags, cores, max_iter, epsilon, step1c, cols2regi, vm, name_x, latex ): regi_ids = dict( @@ -459,6 +487,7 @@ def _error_regimes_multi( name_x + ["lambda"], self.name_w, self.name_regimes, + slx_lags, ), ) else: @@ -477,6 +506,7 @@ def _error_regimes_multi( name_x + ["lambda"], self.name_w, self.name_regimes, + slx_lags, ) ) @@ -501,6 +531,7 @@ def _error_regimes_multi( results = {} self.name_y, self.name_x = [], [] counter = 0 + self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) for r in self.regimes_set: """ if is_win: @@ -530,11 +561,17 @@ def _error_regimes_multi( regi_ids[r], ] = results[r].e_filtered self.name_y += results[r].name_y + results[r].name_x = [str(r) + '_lambda' if value == 'lambda' else value for value in results[r].name_x] self.name_x += results[r].name_x + results[r].other_top = _summary_iteration(results[r]) + self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_x, + 'var_type': ['x'] * (len(results[r].name_x)-1) + + ['lambda'], + 'regime': r, 'equation': r})], ignore_index=True) counter += 1 self.chow = REGI.Chow(self) self.multi = results - SUMMARY.GM_Error_Het_multi(reg=self, multireg=self.multi, vm=vm, regimes=True) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) class GM_Endog_Error_Het_Regimes(RegressionPropsY, REGI.Regimes_Frame): @@ -581,6 +618,9 @@ class GM_Endog_Error_Het_Regimes(RegressionPropsY, REGI.Regimes_Frame): If True, a separate regression is run for each regime. regime_lag_sep : boolean Always False, kept for consistency, ignored. + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the SDEM type. max_iter : int Maximum number of iterations of steps 2a and 2b from :cite:`Arraiz2010`. Note: epsilon provides an additional stop condition. @@ -615,9 +655,13 @@ class GM_Endog_Error_Het_Regimes(RegressionPropsY, REGI.Regimes_Frame): Name of dataset for use in output name_regimes : string Name of regime variable for use in the output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -869,6 +913,7 @@ def __init__( cols2regi="all", regime_err_sep=False, regime_lag_sep=False, + slx_lags=0, inv_method="power_exp", cores=False, vm=False, @@ -881,6 +926,7 @@ def __init__( name_regimes=None, summ=True, add_lag=False, + latex=False, ): n = USER.check_arrays(y, x, yend, q) @@ -898,12 +944,16 @@ def __init__( set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) + if slx_lags > 0: + lag_x = get_lags(w, x_constant, slx_lags) + x_constant = np.hstack((x_constant, lag_x)) + name_x += USER.set_name_spatial_lags(name_x, slx_lags) + if summ: name_yend = USER.set_name_yend(name_yend, yend) self.name_y = USER.set_name_y(name_y) name_q = USER.set_name_q(name_q, q) self.name_x_r = USER.set_name_x(name_x, x_constant) + name_yend - cols2regi = REGI.check_cols2regi( constant_regi, cols2regi, x_constant, yend=yend ) @@ -914,13 +964,14 @@ def __init__( if regime_err_sep == True: if set(cols2regi) == set([True]): - self._endog_error_regimes_multi( + self._endog_error_het_regimes_multi( y, x_constant, regimes, w, yend, q, + slx_lags, cores, max_iter, epsilon, @@ -932,10 +983,11 @@ def __init__( name_yend, name_q, add_lag, + latex, ) else: raise Exception( - "All coefficients must vary accross regimes if regime_err_sep = True." + "All coefficients must vary across regimes if regime_err_sep = True." ) else: x_constant = sphstack(np.ones((x_constant.shape[0], 1)), x_constant) @@ -943,15 +995,16 @@ def __init__( q, name_q = REGI.Regimes_Frame.__init__( self, q, regimes, constant_regi=None, cols2regi="all", names=name_q ) - x, name_x = REGI.Regimes_Frame.__init__( + x, name_x, x_rlist = REGI.Regimes_Frame.__init__( self, x_constant, regimes, constant_regi=None, cols2regi=cols2regi, names=name_x, + rlist=True, ) - yend2, name_yend = REGI.Regimes_Frame.__init__( + yend2, name_yend, yend_rlist = REGI.Regimes_Frame.__init__( self, yend, regimes, @@ -959,6 +1012,7 @@ def __init__( cols2regi=cols2regi, yend=True, names=name_yend, + rlist=True, ) # 1a. S2SLS --> \tilde{\delta} @@ -1047,13 +1101,20 @@ def __init__( self.kf += 1 self.chow = REGI.Chow(self) self._cache = {} + self.output = pd.DataFrame(self.name_z, + columns=['var_names']) + self.output['var_type'] = ['x']*len(self.name_x)+['yend']*len(self.name_yend)+['lambda'] + self.output['regime'] = x_rlist + yend_rlist + ['_Global'] + self.output['equation'] = 0 if summ: - self.title = ( - "SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES (HET) - REGIMES" - ) - SUMMARY.GM_Endog_Error_Het(reg=self, w=w, vm=vm, regimes=True) - - def _endog_error_regimes_multi( + self.other_top = _summary_iteration(self) + if slx_lags == 0: + self.title = ("GM SPATIALLY WEIGHTED 2SLS (HET) - REGIMES") + else: + self.title = ("GM SPATIALLY WEIGHTED 2SLS + SLX (SDEM-HET) - REGIMES") + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) + + def _endog_error_het_regimes_multi( self, y, x, @@ -1061,6 +1122,7 @@ def _endog_error_regimes_multi( w, yend, q, + slx_lags, cores, max_iter, epsilon, @@ -1072,6 +1134,7 @@ def _endog_error_regimes_multi( name_yend, name_q, add_lag, + latex, ): regi_ids = dict( @@ -1120,6 +1183,7 @@ def _endog_error_regimes_multi( self.name_w, self.name_regimes, add_lag, + slx_lags, ), ) else: @@ -1144,6 +1208,7 @@ def _endog_error_regimes_multi( self.name_w, self.name_regimes, add_lag, + slx_lags, ) ) @@ -1174,6 +1239,7 @@ def _endog_error_regimes_multi( self.name_h, ) = ([], [], [], [], [], []) counter = 0 + self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) for r in self.regimes_set: """ if is_win: @@ -1215,17 +1281,19 @@ def _endog_error_regimes_multi( self.e_pred[ regi_ids[r], ] = results[r].e_pred + results[r].other_top = _spat_pseudo_r2(results[r]) + v_type = ['x'] * len(results[r].name_x) + ['yend'] * (len(results[r].name_yend)-1) + ['rho','lambda'] + else: + results[r].other_top = "" + v_type = ['x'] * len(results[r].name_x) + ['yend'] * len(results[r].name_yend) + ['lambda'] + results[r].other_top += _summary_iteration(results[r]) + self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_z, + 'var_type': v_type, + 'regime': r, 'equation': r})], ignore_index=True) counter += 1 self.chow = REGI.Chow(self) self.multi = results - if add_lag != False: - SUMMARY.GM_Combo_Het_multi( - reg=self, multireg=self.multi, vm=vm, regimes=True - ) - else: - SUMMARY.GM_Endog_Error_Het_multi( - reg=self, multireg=self.multi, vm=vm, regimes=True - ) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) class GM_Combo_Het_Regimes(GM_Endog_Error_Het_Regimes): @@ -1274,6 +1342,9 @@ class GM_Combo_Het_Regimes(GM_Endog_Error_Het_Regimes): If True, the spatial parameter for spatial lag is also computed according to different regimes. If False (default), the spatial parameter is fixed across regimes. + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the GNSM type. w_lags : integer Orders of W to include as instruments for the spatially lagged dependent variable. For example, w_lags=1, then @@ -1315,9 +1386,13 @@ class GM_Combo_Het_Regimes(GM_Endog_Error_Het_Regimes): Name of dataset for use in output name_regimes : string Name of regime variable for use in the output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -1595,6 +1670,7 @@ def __init__( yend=None, q=None, w=None, + slx_lags=0, w_lags=1, lag_q=True, max_iter=1, @@ -1614,8 +1690,14 @@ def __init__( name_w=None, name_ds=None, name_regimes=None, + latex=False, ): - + if regime_lag_sep and not regime_err_sep: + set_warn(self, "regime_err_sep set to True when regime_lag_sep=True.") + regime_err_sep = True + if regime_err_sep and not regime_lag_sep: + set_warn(self, "regime_err_sep set to False when regime_lag_sep=False.") + regime_err_sep = False n = USER.check_arrays(y, x) self.step1c = step1c y = USER.check_y(y, n) @@ -1623,16 +1705,27 @@ def __init__( x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) - self.name_x_r = USER.set_name_x(name_x, x_constant) self.name_y = USER.set_name_y(name_y) name_yend = USER.set_name_yend(name_yend, yend) name_q = USER.set_name_q(name_q, q) - name_q.extend(USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True)) - cols2regi = REGI.check_cols2regi( - constant_regi, cols2regi, x_constant, yend=yend, add_cons=False - ) + if regime_err_sep and any(col != True for col in cols2regi): + set_warn(self, "All coefficients must vary across regimes if regime_err_sep = True, so setting cols2regi = 'all'.") + cols2regi = "all" + + if slx_lags > 0: + yend2, q2, wx = set_endog(y, x_constant, w, yend, q, w_lags, lag_q, slx_lags) + x_constant = np.hstack((x_constant, wx)) + name_slx = USER.set_name_spatial_lags(name_x, slx_lags) + name_q.extend(USER.set_name_q_sp(name_slx[-len(name_x):], w_lags, name_q, lag_q, force_all=True)) + name_x += name_slx + cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant[:, :-1], yend=yend2, add_cons=False) + else: + name_q.extend(USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True)) + yend2, q2 = yend, q + cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant, yend=yend2, add_cons=False) + self.regimes_set = REGI._get_regimes_set(regimes) self.regimes = regimes USER.check_regimes(self.regimes_set, n, x_constant.shape[1]) @@ -1640,27 +1733,25 @@ def __init__( self.regime_lag_sep = regime_lag_sep if regime_lag_sep == True: - if regime_err_sep == False: - raise Exception( - "For spatial combo models, if spatial lag is set by regimes (regime_lag_sep=True), spatial error must also be set by regimes (regime_err_sep=True)." - ) - add_lag = [w_lags, lag_q] + if slx_lags == 0: + add_lag = [w_lags, lag_q] + else: + cols2regi += [True] + add_lag = False else: - cols2regi += [False] add_lag = False - if regime_err_sep == True: - raise Exception( - "For spatial combo models, if spatial error is set by regimes (regime_err_sep=True), all coefficients including lambda (regime_lag_sep=True) must be set by regimes." - ) - yend, q = set_endog(y, x_constant, w, yend, q, w_lags, lag_q) + cols2regi += [False] + if slx_lags == 0: + yend2, q2 = set_endog(y, x_constant, w, yend2, q2, w_lags, lag_q) + name_yend.append(USER.set_name_yend_sp(self.name_y)) GM_Endog_Error_Het_Regimes.__init__( self, y=y, x=x_constant, - yend=yend, - q=q, + yend=yend2, + q=q2, regimes=regimes, w=w, constant_regi=constant_regi, @@ -1681,17 +1772,24 @@ def __init__( name_regimes=name_regimes, summ=False, add_lag=add_lag, + latex=latex, ) if regime_err_sep != True: self.rho = self.betas[-2] self.predy_e, self.e_pred, warn = UTILS.sp_att( - w, self.y, self.predy, yend[:, -1].reshape(self.n, 1), self.rho + w, self.y, self.predy, yend2[:, -1].reshape(self.n, 1), self.rho ) UTILS.set_warn(self, warn) self.regime_lag_sep = regime_lag_sep - self.title = "SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES (HET) - REGIMES" - SUMMARY.GM_Combo_Het(reg=self, w=w, vm=vm, regimes=True) + if slx_lags == 0: + self.title = "GM SPATIALLY WEIGHTED 2SLS-COMBO MODEL (HET) - REGIMES" + else: + self.title = "GM SPATIALLY WEIGHTED 2SLS-COMBO + SLX (GNSM-HET) - REGIMES" + self.output.iat[-2, self.output.columns.get_loc('var_type')] = 'rho' + self.other_top = _spat_pseudo_r2(self) + self.other_top += _summary_iteration(self) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) def _work_error( @@ -1708,6 +1806,7 @@ def _work_error( name_x, name_w, name_regimes, + slx_lags, ): w_r, warn = REGI.w_regime(w, regi_ids[r], r, transform=True) y_r = y[regi_ids[r]] @@ -1717,7 +1816,10 @@ def _work_error( ) set_warn(model, warn) model.w = w_r - model.title = "SPATIALLY WEIGHTED LEAST SQUARES ESTIMATION (HET) - REGIME %s" % r + if slx_lags == 0: + model.title = "GM SPATIALLY WEIGHTED LS (HET) - REGIME %s" % r + else: + model.title = "GM SPATIALLY WEIGHTED LS + SLX (SDEM-HET) - REGIME %s" % r model.name_ds = name_ds model.name_y = "%s_%s" % (str(r), name_y) model.name_x = ["%s_%s" % (str(r), i) for i in name_x] @@ -1746,6 +1848,7 @@ def _work_endog_error( name_w, name_regimes, add_lag, + slx_lags, ): w_r, warn = REGI.w_regime(w, regi_ids[r], r, transform=True) y_r = y[regi_ids[r]] @@ -1777,12 +1880,22 @@ def _work_endog_error( w_r, model.y, model.predy, model.yend[:, -1].reshape(model.n, 1), model.rho ) set_warn(model, warn) - model.title = "SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES (HET) - REGIME %s" % r + + if slx_lags == 0: + if add_lag != False: + model.title = "GM SPATIALLY WEIGHTED 2SLS-COMBO MODEL (HET)- REGIME %s" % r + else: + model.title = "GM SPATIALLY WEIGHTED 2SLS (HET) - REGIME %s" % r + else: + if add_lag != False: + model.title = "GM SPATIAL COMBO MODEL + SLX (GNSM-HET) - REGIME %s" % r + else: + model.title = "GM SPATIALLY WEIGHTED 2SLS + SLX (SDEM-HET) - REGIME %s" % r model.name_ds = name_ds model.name_y = "%s_%s" % (str(r), name_y) model.name_x = ["%s_%s" % (str(r), i) for i in name_x] model.name_yend = ["%s_%s" % (str(r), i) for i in name_yend] - model.name_z = model.name_x + model.name_yend + ["lambda"] + model.name_z = model.name_x + model.name_yend + [str(r)+"lambda"] model.name_q = ["%s_%s" % (str(r), i) for i in name_q] model.name_h = model.name_x + model.name_q model.name_w = name_w @@ -1801,3 +1914,33 @@ def _test(): if __name__ == "__main__": _test() + + import numpy as np + import libpysal + + db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r') + y = np.array(db.by_col("HOVAL")) + y = np.reshape(y, (49,1)) + X = [] + X.append(db.by_col("INC")) + X = np.array(X).T + yd = [] + yd.append(db.by_col("CRIME")) + yd = np.array(yd).T + q = [] + q.append(db.by_col("DISCBD")) + q = np.array(q).T + + r_var = 'NSA' + regimes = db.by_col(r_var) + + w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + w.transform = 'r' + #reg = GM_Error_Het_Regimes(y, X, regimes, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', vm=True, + # regime_err_sep=True) + #reg = GM_Endog_Error_Het_Regimes(y, X, yd, q, regimes, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], + # name_q=['discbd'], name_ds='columbus',vm=True, regime_err_sep=True) + reg = GM_Combo_Het_Regimes(y, X, regimes, yd, q, w=w, step1c=True, name_x=['inc'], name_y='hoval', name_yend=['crime'], + name_q=['discbd'], name_ds='columbus', vm=True, regime_err_sep=False, regime_lag_sep=False) + print(reg.output) + print(reg.summary) \ No newline at end of file diff --git a/spreg/error_sp_hom.py b/spreg/error_sp_hom.py index 5a4d8042..6208b8af 100644 --- a/spreg/error_sp_hom.py +++ b/spreg/error_sp_hom.py @@ -10,14 +10,14 @@ import numpy as np from numpy import linalg as la from . import ols as OLS -from libpysal.weights.spatial_lag import lag_spatial -from .utils import power_expansion, set_endog, iter_msg, sp_att +from .utils import set_endog, iter_msg, sp_att from .utils import get_A1_hom, get_A2_hom, get_A1_het, optim_moments -from .utils import get_spFilter, get_lags, _moments2eqs +from .utils import get_spFilter, get_lags from .utils import spdot, RegressionPropsY, set_warn from . import twosls as TSLS from . import user_output as USER -from . import summary_output as SUMMARY +import pandas as pd +from .output import output, _spat_pseudo_r2, _summary_iteration __all__ = ["GM_Error_Hom", "GM_Endog_Error_Hom", "GM_Combo_Hom"] @@ -178,6 +178,9 @@ class GM_Error_Hom(BaseGM_Error_Hom): independent (exogenous) variable, excluding the constant w : pysal W object Spatial weights object + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the SDEM type. max_iter : int Maximum number of iterations of steps 2a and 2b from :cite:`Arraiz2010`. Note: epsilon provides an additional stop condition. @@ -201,10 +204,13 @@ class GM_Error_Hom(BaseGM_Error_Hom): Name of weights matrix for use in output name_ds : string Name of dataset for use in output - + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -341,6 +347,7 @@ def __init__( y, x, w, + slx_lags=0, max_iter=1, epsilon=0.00001, A1="hom_sc", @@ -349,6 +356,7 @@ def __init__( name_x=None, name_w=None, name_ds=None, + latex=False, ): n = USER.check_arrays(y, x) @@ -356,6 +364,12 @@ def __init__( USER.check_weights(w, y, w_required=True) x_constant, name_x, warn = USER.check_constant(x, name_x) set_warn(self, warn) + self.title = "GM SPATIALLY WEIGHTED LEAST SQUARES (HOM)" + if slx_lags >0: + lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + x_constant = np.hstack((x_constant, lag_x)) + name_x += USER.set_name_spatial_lags(name_x, slx_lags) + self.title += " WITH SLX (SDEM)" BaseGM_Error_Hom.__init__( self, y=y, @@ -365,13 +379,17 @@ def __init__( max_iter=max_iter, epsilon=epsilon, ) - self.title = "SPATIALLY WEIGHTED LEAST SQUARES (HOM)" self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) self.name_x = USER.set_name_x(name_x, x_constant) self.name_x.append("lambda") self.name_w = USER.set_name_w(name_w, w) - SUMMARY.GM_Error_Hom(reg=self, w=w, vm=vm) + self.A1 = A1 + self.output = pd.DataFrame(self.name_x, columns=['var_names']) + self.output['var_type'] = ['x'] * (len(self.name_x) - 1) + ['lambda'] + self.output['regime'], self.output['equation'] = (0, 0) + self.other_top = _summary_iteration(self) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) class BaseGM_Endog_Error_Hom(RegressionPropsY): @@ -564,6 +582,9 @@ class GM_Endog_Error_Hom(BaseGM_Endog_Error_Hom): this should not contain any variables from x) w : pysal W object Spatial weights object + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the SDEM type. max_iter : int Maximum number of iterations of steps 2a and 2b from :cite:`Arraiz2010`. Note: epsilon provides an additional stop condition. @@ -591,9 +612,13 @@ class GM_Endog_Error_Hom(BaseGM_Endog_Error_Hom): Name of weights matrix for use in output name_ds : string Name of dataset for use in output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -769,6 +794,7 @@ def __init__( yend, q, w, + slx_lags=0, max_iter=1, epsilon=0.00001, A1="hom_sc", @@ -779,6 +805,7 @@ def __init__( name_q=None, name_w=None, name_ds=None, + latex=False, ): n = USER.check_arrays(y, x, yend, q) @@ -786,6 +813,12 @@ def __init__( USER.check_weights(w, y, w_required=True) x_constant, name_x, warn = USER.check_constant(x, name_x) set_warn(self, warn) + self.title = "GM SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES (HOM)" + if slx_lags > 0: + lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + x_constant = np.hstack((x_constant, lag_x)) + name_x += USER.set_name_spatial_lags(name_x, slx_lags) + self.title += " WITH SLX (SDEM)" BaseGM_Endog_Error_Hom.__init__( self, y=y, @@ -797,7 +830,6 @@ def __init__( max_iter=max_iter, epsilon=epsilon, ) - self.title = "SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES (HOM)" self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) self.name_x = USER.set_name_x(name_x, x_constant) @@ -807,7 +839,13 @@ def __init__( self.name_q = USER.set_name_q(name_q, q) self.name_h = USER.set_name_h(self.name_x, self.name_q) self.name_w = USER.set_name_w(name_w, w) - SUMMARY.GM_Endog_Error_Hom(reg=self, w=w, vm=vm) + self.A1 = A1 + self.output = pd.DataFrame(self.name_z, + columns=['var_names']) + self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * len(self.name_yend) + ['lambda'] + self.output['regime'], self.output['equation'] = (0, 0) + self.other_top = _summary_iteration(self) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) class BaseGM_Combo_Hom(BaseGM_Endog_Error_Hom): @@ -1007,6 +1045,10 @@ class GM_Combo_Hom(BaseGM_Combo_Hom): Orders of W to include as instruments for the spatially lagged dependent variable. For example, w_lags=1, then instruments are WX; if w_lags=2, then WX, WWX; and so on. + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the General Nesting + Spatial Model (GNSM) type. lag_q : boolean If True, then include spatial lags of the additional instruments (q). @@ -1038,9 +1080,13 @@ class GM_Combo_Hom(BaseGM_Combo_Hom): Name of weights matrix for use in output name_ds : string Name of dataset for use in output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -1232,6 +1278,7 @@ def __init__( q=None, w=None, w_lags=1, + slx_lags=0, lag_q=True, max_iter=1, epsilon=0.00001, @@ -1243,6 +1290,7 @@ def __init__( name_q=None, name_w=None, name_ds=None, + latex=False, ): n = USER.check_arrays(y, x, yend, q) @@ -1250,7 +1298,11 @@ def __init__( USER.check_weights(w, y, w_required=True) x_constant, name_x, warn = USER.check_constant(x, name_x) set_warn(self, warn) - yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) + if slx_lags == 0: + yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) + else: + yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags) + x_constant = np.hstack((x_constant, wx)) BaseGM_Combo_Hom.__init__( self, y=y, @@ -1269,7 +1321,10 @@ def __init__( w, self.y, self.predy, yend2[:, -1].reshape(self.n, 1), self.rho ) set_warn(self, warn) - self.title = "SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES (HOM)" + self.title = "SPATIALLY WEIGHTED 2SLS- GM-COMBO MODEL (HOM)" + if slx_lags > 0: + name_x += USER.set_name_spatial_lags(name_x, slx_lags) + self.title += " WITH SLX (GNSM)" self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) self.name_x = USER.set_name_x(name_x, x_constant) @@ -1281,8 +1336,14 @@ def __init__( self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) self.name_h = USER.set_name_h(self.name_x, self.name_q) self.name_w = USER.set_name_w(name_w, w) - SUMMARY.GM_Combo_Hom(reg=self, w=w, vm=vm) - + self.A1 = A1 + self.output = pd.DataFrame(self.name_z, + columns=['var_names']) + self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend) - 1) + ['rho', 'lambda'] + self.output['regime'], self.output['equation'] = (0, 0) + self.other_top = _spat_pseudo_r2(self) + self.other_top += _summary_iteration(self) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) # Functions @@ -1514,5 +1575,31 @@ def _test(): if __name__ == "__main__": - _test() + + import numpy as np + import libpysal + + db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r') + y = np.array(db.by_col("HOVAL")) + y = np.reshape(y, (49,1)) + X = [] + X.append(db.by_col("INC")) + X = np.array(X).T + yd = [] + yd.append(db.by_col("CRIME")) + yd = np.array(yd).T + q = [] + q.append(db.by_col("DISCBD")) + q = np.array(q).T + + w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + w.transform = 'r' + #reg = GM_Error_Hom(y, X, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', vm=True) + #reg = GM_Endog_Error_Hom(y, X, yd, q, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], + # name_q=['discbd'], name_ds='columbus',vm=True) + reg = GM_Combo_Hom(y, X, yd, q, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], name_q=['discbd'], + name_ds='columbus', vm=True) + + print(reg.output) + print(reg.summary) \ No newline at end of file diff --git a/spreg/error_sp_hom_regimes.py b/spreg/error_sp_hom_regimes.py index 9812584e..f8d7dfae 100644 --- a/spreg/error_sp_hom_regimes.py +++ b/spreg/error_sp_hom_regimes.py @@ -26,9 +26,9 @@ ) from . import regimes as REGI from . import user_output as USER -from . import summary_output as SUMMARY from platform import system - +import pandas as pd +from .output import output, _summary_iteration, _spat_pseudo_r2 class GM_Error_Hom_Regimes(RegressionPropsY, REGI.Regimes_Frame): @@ -67,6 +67,9 @@ class GM_Error_Hom_Regimes(RegressionPropsY, REGI.Regimes_Frame): If True, a separate regression is run for each regime. regime_lag_sep: boolean Always False, kept for consistency, ignored. + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the SDEM type. max_iter : int Maximum number of iterations of steps 2a and 2b from :cite:`Arraiz2010`. Note: epsilon provides an additional @@ -97,9 +100,13 @@ class GM_Error_Hom_Regimes(RegressionPropsY, REGI.Regimes_Frame): Name of dataset for use in output name_regimes : string Name of regime variable for use in the output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -317,12 +324,14 @@ def __init__( cols2regi="all", regime_err_sep=False, regime_lag_sep=False, + slx_lags=0, vm=False, name_y=None, name_x=None, name_w=None, name_ds=None, name_regimes=None, + latex=False, ): n = USER.check_arrays(y, x) @@ -341,6 +350,12 @@ def __init__( x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) + + if slx_lags >0: + lag_x = get_lags(w, x_constant, slx_lags) + x_constant = np.hstack((x_constant, lag_x)) + name_x += USER.set_name_spatial_lags(name_x, slx_lags) + self.name_x_r = USER.set_name_x(name_x, x_constant) cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant) @@ -351,11 +366,12 @@ def __init__( if regime_err_sep == True: if set(cols2regi) == set([True]): - self._error_regimes_multi( + self._error_hom_regimes_multi( y, x_constant, regimes, w, + slx_lags, cores, max_iter, epsilon, @@ -363,10 +379,11 @@ def __init__( cols2regi, vm, name_x, + latex, ) else: raise Exception( - "All coefficients must vary accross regimes if regime_err_sep = True." + "All coefficients must vary across regimes if regime_err_sep = True." ) else: x_constant = sphstack(np.ones((x_constant.shape[0], 1)), x_constant) @@ -381,13 +398,14 @@ def __init__( wA2 = get_A2_hom(w.sparse) # 1a. OLS --> \tilde{\delta} - self.x, self.name_x = REGI.Regimes_Frame.__init__( + self.x, self.name_x, x_rlist = REGI.Regimes_Frame.__init__( self, x_constant, regimes, constant_regi=None, cols2regi=cols2regi, names=name_x, + rlist=True, ) ols = BaseOLS(y=y, x=self.x) self.k = ols.x.shape[1] @@ -425,16 +443,25 @@ def __init__( w.sparse, wA1, wA2, self, lambda2, moments[0] ) self.e_filtered = self.u - lambda2 * lag_spatial(w, self.u) - self.title = "SPATIALLY WEIGHTED LEAST SQUARES (HOM) - REGIMES" + if slx_lags == 0: + self.title = "GM SPATIALLY WEIGHTED MODEL (HOM) - REGIMES" + else: + self.title = "GM SPATIALLY WEIGHTED MODEL + SLX (SDEM-HOM) - REGIMES" self.name_x.append("lambda") self.kf += 1 self.chow = REGI.Chow(self) self._cache = {} - SUMMARY.GM_Error_Hom(reg=self, w=w, vm=vm, regimes=True) - - def _error_regimes_multi( - self, y, x, regimes, w, cores, max_iter, epsilon, A1, cols2regi, vm, name_x - ): + self.A1 = A1 + self.output = pd.DataFrame(self.name_x, + columns=['var_names']) + self.output['var_type'] = ['x']*(len(self.name_x)-1)+['lambda'] + self.output['regime'] = x_rlist + ['_Global'] + self.output['equation'] = 0 + self.other_top = _summary_iteration(self) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) + + def _error_hom_regimes_multi( + self, y, x, regimes, w, slx_lags, cores, max_iter, epsilon, A1, cols2regi, vm, name_x, latex): regi_ids = dict( (r, list(np.where(np.array(regimes) == r)[0])) for r in self.regimes_set @@ -471,6 +498,7 @@ def _error_regimes_multi( name_x + ["lambda"], self.name_w, self.name_regimes, + slx_lags, ), ) else: @@ -489,6 +517,7 @@ def _error_regimes_multi( name_x + ["lambda"], self.name_w, self.name_regimes, + slx_lags, ) ) @@ -513,6 +542,7 @@ def _error_regimes_multi( results = {} counter = 0 + self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) for r in self.regimes_set: """ if is_win: @@ -543,10 +573,16 @@ def _error_regimes_multi( ] = results[r].e_filtered self.name_y += results[r].name_y self.name_x += results[r].name_x + results[r].A1 = A1 + results[r].other_top = _summary_iteration(results[r]) + self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_x, + 'var_type': ['x'] * (len(results[r].name_x) - 1) + + ['lambda'], + 'regime': r, 'equation': r})], ignore_index=True) counter += 1 self.chow = REGI.Chow(self) self.multi = results - SUMMARY.GM_Error_Hom_multi(reg=self, multireg=self.multi, vm=vm, regimes=True) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) class GM_Endog_Error_Hom_Regimes(RegressionPropsY, REGI.Regimes_Frame): @@ -594,6 +630,9 @@ class GM_Endog_Error_Hom_Regimes(RegressionPropsY, REGI.Regimes_Frame): If True, a separate regression is run for each regime. regime_lag_sep : boolean Always False, kept for consistency, ignored. + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the SDEM type. max_iter : int Maximum number of iterations of steps 2a and 2b from :cite:`Arraiz2010`. Note: epsilon provides an additional stop condition. @@ -624,9 +663,13 @@ class GM_Endog_Error_Hom_Regimes(RegressionPropsY, REGI.Regimes_Frame): Name of dataset for use in output name_regimes : string Name of regime variable for use in the output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -884,6 +927,7 @@ def __init__( cols2regi="all", regime_err_sep=False, regime_lag_sep=False, + slx_lags=0, max_iter=1, epsilon=0.00001, A1="het", @@ -898,6 +942,7 @@ def __init__( name_regimes=None, summ=True, add_lag=False, + latex=False, ): n = USER.check_arrays(y, x, yend, q) @@ -914,6 +959,10 @@ def __init__( x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) + if slx_lags > 0: + lag_x = get_lags(w, x_constant, slx_lags) + x_constant = np.hstack((x_constant, lag_x)) + name_x += USER.set_name_spatial_lags(name_x, slx_lags) if summ: name_yend = USER.set_name_yend(name_yend, yend) @@ -931,13 +980,14 @@ def __init__( if regime_err_sep == True: if set(cols2regi) == set([True]): - self._endog_error_regimes_multi( + self._endog_error_hom_regimes_multi( y, x_constant, regimes, w, yend, q, + slx_lags, cores, max_iter, epsilon, @@ -948,10 +998,11 @@ def __init__( name_yend, name_q, add_lag, + latex, ) else: raise Exception( - "All coefficients must vary accross regimes if regime_err_sep = True." + "All coefficients must vary across regimes if regime_err_sep = True." ) else: x_constant = sphstack(np.ones((x_constant.shape[0], 1)), x_constant) @@ -959,15 +1010,16 @@ def __init__( q, name_q = REGI.Regimes_Frame.__init__( self, q, regimes, constant_regi=None, cols2regi="all", names=name_q ) - x, name_x = REGI.Regimes_Frame.__init__( + x, name_x, x_rlist = REGI.Regimes_Frame.__init__( self, x_constant, regimes, constant_regi=None, cols2regi=cols2regi, names=name_x, + rlist=True, ) - yend2, name_yend = REGI.Regimes_Frame.__init__( + yend2, name_yend, yend_rlist = REGI.Regimes_Frame.__init__( self, yend, regimes, @@ -975,6 +1027,7 @@ def __init__( cols2regi=cols2regi, yend=True, names=name_yend, + rlist=True, ) if A1 == "hom": wA1 = get_A1_hom(w.sparse) @@ -1042,13 +1095,21 @@ def __init__( self.kf += 1 self.chow = REGI.Chow(self) self._cache = {} + self.A1 = A1 + self.output = pd.DataFrame(self.name_z, + columns=['var_names']) + self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * len(self.name_yend) + ['lambda'] + self.output['regime'] = x_rlist + yend_rlist + ['_Global'] + self.output['equation'] = 0 if summ: - self.title = ( - "SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES (HOM) - REGIMES" - ) - SUMMARY.GM_Endog_Error_Hom(reg=self, w=w, vm=vm, regimes=True) - - def _endog_error_regimes_multi( + self.other_top = _summary_iteration(self) + if slx_lags == 0: + self.title = ("GM SPATIALLY WEIGHTED 2SLS (HOM) - REGIMES") + else: + self.title = ("GM SPATIALLY WEIGHTED 2SLS WITH SLX (SDEM-HOM) - REGIMES") + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) + + def _endog_error_hom_regimes_multi( self, y, x, @@ -1056,6 +1117,7 @@ def _endog_error_regimes_multi( w, yend, q, + slx_lags, cores, max_iter, epsilon, @@ -1066,6 +1128,7 @@ def _endog_error_regimes_multi( name_yend, name_q, add_lag, + latex, ): regi_ids = dict( @@ -1113,6 +1176,7 @@ def _endog_error_regimes_multi( self.name_w, self.name_regimes, add_lag, + slx_lags, ), ) else: @@ -1136,6 +1200,7 @@ def _endog_error_regimes_multi( self.name_w, self.name_regimes, add_lag, + slx_lags, ) ) @@ -1166,6 +1231,7 @@ def _endog_error_regimes_multi( self.name_h, ) = ([], [], [], [], [], []) counter = 0 + self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) for r in self.regimes_set: """ if is_win: @@ -1207,17 +1273,20 @@ def _endog_error_regimes_multi( self.e_pred[ regi_ids[r], ] = results[r].e_pred + results[r].other_top = _spat_pseudo_r2(results[r]) + v_type = ['x'] * len(results[r].name_x) + ['yend'] * (len(results[r].name_yend)-1) + ['rho','lambda'] + else: + results[r].other_top = "" + v_type = ['x'] * len(results[r].name_x) + ['yend'] * len(results[r].name_yend) + ['lambda'] + results[r].A1 = A1 + results[r].other_top += _summary_iteration(results[r]) + self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_z, + 'var_type': v_type, + 'regime': r, 'equation': r})], ignore_index=True) counter += 1 self.chow = REGI.Chow(self) self.multi = results - if add_lag != False: - SUMMARY.GM_Combo_Hom_multi( - reg=self, multireg=self.multi, vm=vm, regimes=True - ) - else: - SUMMARY.GM_Endog_Error_Hom_multi( - reg=self, multireg=self.multi, vm=vm, regimes=True - ) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) class GM_Combo_Hom_Regimes(GM_Endog_Error_Hom_Regimes): @@ -1267,6 +1336,9 @@ class GM_Combo_Hom_Regimes(GM_Endog_Error_Hom_Regimes): If True, the spatial parameter for spatial lag is also computed according to different regimes. If False (default), the spatial parameter is fixed across regimes. + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the GNSM type. w_lags : integer Orders of W to include as instruments for the spatially lagged dependent variable. For example, w_lags=1, then @@ -1307,9 +1379,13 @@ class GM_Combo_Hom_Regimes(GM_Endog_Error_Hom_Regimes): Name of dataset for use in output name_regimes : string Name of regime variable for use in the output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -1593,6 +1669,7 @@ def __init__( yend=None, q=None, w=None, + slx_lags=0, w_lags=1, lag_q=True, cores=False, @@ -1611,22 +1688,42 @@ def __init__( name_w=None, name_ds=None, name_regimes=None, + latex=False, ): + if regime_lag_sep and not regime_err_sep: + set_warn(self, "regime_err_sep set to True when regime_lag_sep=True.") + regime_err_sep = True + if regime_err_sep and not regime_lag_sep: + set_warn(self, "regime_err_sep set to False when regime_lag_sep=False.") + regime_err_sep = False n = USER.check_arrays(y, x) y = USER.check_y(y, n) USER.check_weights(w, y, w_required=True) x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) + self.name_y = USER.set_name_y(name_y) name_yend = USER.set_name_yend(name_yend, yend) name_q = USER.set_name_q(name_q, q) - name_q.extend(USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True)) - cols2regi = REGI.check_cols2regi( - constant_regi, cols2regi, x_constant, yend=yend, add_cons=False - ) + if regime_err_sep and any(col != True for col in cols2regi): + set_warn(self, "All coefficients must vary across regimes if regime_err_sep = True, so setting cols2regi = 'all'.") + cols2regi = "all" + + if slx_lags > 0: + yend2, q2, wx = set_endog(y, x_constant, w, yend, q, w_lags, lag_q, slx_lags) + x_constant = np.hstack((x_constant, wx)) + name_slx = USER.set_name_spatial_lags(name_x, slx_lags) + name_q.extend(USER.set_name_q_sp(name_slx[-len(name_x):], w_lags, name_q, lag_q, force_all=True)) + name_x += name_slx + cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant[:, :-1], yend=yend2, add_cons=False) + else: + name_q.extend(USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True)) + yend2, q2 = yend, q + cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant, yend=yend2, add_cons=False) + self.regimes_set = REGI._get_regimes_set(regimes) self.regimes = regimes USER.check_regimes(self.regimes_set, n, x_constant.shape[1]) @@ -1634,27 +1731,26 @@ def __init__( self.regime_lag_sep = regime_lag_sep if regime_lag_sep == True: - if regime_err_sep == False: - raise Exception( - "For spatial combo models, if spatial lag is set by regimes (regime_lag_sep=True), spatial error must also be set by regimes (regime_err_sep=True)." - ) - add_lag = [w_lags, lag_q] + if slx_lags == 0: + add_lag = [w_lags, lag_q] + else: + add_lag = False + cols2regi += [True] else: - cols2regi += [False] add_lag = False - if regime_err_sep == True: - raise Exception( - "For spatial combo models, if spatial error is set by regimes (regime_err_sep=True), all coefficients including lambda (regime_lag_sep=True) must be set by regimes." - ) - yend, q = set_endog(y, x_constant, w, yend, q, w_lags, lag_q) + cols2regi += [False] + if slx_lags == 0: + yend2, q2 = set_endog(y, x_constant, w, yend2, q2, w_lags, lag_q) + + name_yend.append(USER.set_name_yend_sp(self.name_y)) GM_Endog_Error_Hom_Regimes.__init__( self, y=y, x=x_constant, - yend=yend, - q=q, + yend=yend2, + q=q2, regimes=regimes, w=w, vm=vm, @@ -1674,17 +1770,24 @@ def __init__( name_regimes=name_regimes, summ=False, add_lag=add_lag, + latex=latex, ) if regime_err_sep != True: self.rho = self.betas[-2] self.predy_e, self.e_pred, warn = sp_att( - w, self.y, self.predy, yend[:, -1].reshape(self.n, 1), self.rho + w, self.y, self.predy, yend2[:, -1].reshape(self.n, 1), self.rho ) set_warn(self, warn) self.regime_lag_sep = regime_lag_sep - self.title = "SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES (HOM) - REGIMES" - SUMMARY.GM_Combo_Hom(reg=self, w=w, vm=vm, regimes=True) + if slx_lags == 0: + self.title = "GM SPATIALLY WEIGHTED 2SLS-COMBO MODEL (HOM) - REGIMES" + else: + self.title = "GM SPATIALLY WEIGHTED 2SLS-COMBO WITH SLX (GNSM-HOM) - REGIMES" + self.output.iat[-2, self.output.columns.get_loc('var_type')] = 'rho' + self.other_top = _spat_pseudo_r2(self) + self.other_top += _summary_iteration(self) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) def _work_error( @@ -1701,6 +1804,7 @@ def _work_error( name_x, name_w, name_regimes, + slx_lags, ): w_r, warn = REGI.w_regime(w, regi_ids[r], r, transform=True) y_r = y[regi_ids[r]] @@ -1710,7 +1814,10 @@ def _work_error( ) set_warn(model, warn) model.w = w_r - model.title = "SPATIALLY WEIGHTED LEAST SQUARES ESTIMATION (HOM) - REGIME %s" % r + if slx_lags == 0: + model.title = "GM SPATIALLY WEIGHTED 2SLS (HOM) - REGIME %s" % r + else: + model.title = "GM SPATIALLY WEIGHTED 2SLS + SLX (SDEM-HOM) - REGIME %s" % r model.name_ds = name_ds model.name_y = "%s_%s" % (str(r), name_y) model.name_x = ["%s_%s" % (str(r), i) for i in name_x] @@ -1738,6 +1845,7 @@ def _work_endog_error( name_w, name_regimes, add_lag, + slx_lags, ): w_r, warn = REGI.w_regime(w, regi_ids[r], r, transform=True) y_r = y[regi_ids[r]] @@ -1762,12 +1870,21 @@ def _work_endog_error( w_r, model.y, model.predy, model.yend[:, -1].reshape(model.n, 1), model.rho ) set_warn(model, warn) - model.title = "SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES (HOM) - REGIME %s" % r + if slx_lags == 0: + if add_lag != False: + model.title = "GM SPATIALLY WEIGHTED 2SLS-COMBO MODEL (HOM)- REGIME %s" % r + else: + model.title = "GM SPATIALLY WEIGHTED 2SLS (HOM) - REGIME %s" % r + else: + if add_lag != False: + model.title = "GM SPATIAL COMBO MODEL + SLX (GNSM-HOM) - REGIME %s" % r + else: + model.title = "GM SPATIALLY WEIGHTED 2SLS + SLX (SDEM-HOM) - REGIME %s" % r model.name_ds = name_ds model.name_y = "%s_%s" % (str(r), name_y) model.name_x = ["%s_%s" % (str(r), i) for i in name_x] model.name_yend = ["%s_%s" % (str(r), i) for i in name_yend] - model.name_z = model.name_x + model.name_yend + ["lambda"] + model.name_z = model.name_x + model.name_yend + [str(r)+"_lambda"] model.name_q = ["%s_%s" % (str(r), i) for i in name_q] model.name_h = model.name_x + model.name_q model.name_w = name_w @@ -1786,3 +1903,33 @@ def _test(): if __name__ == "__main__": _test() + + import numpy as np + import libpysal + + db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r') + y = np.array(db.by_col("HOVAL")) + y = np.reshape(y, (49,1)) + X = [] + X.append(db.by_col("INC")) + X = np.array(X).T + yd = [] + yd.append(db.by_col("CRIME")) + yd = np.array(yd).T + q = [] + q.append(db.by_col("DISCBD")) + q = np.array(q).T + + r_var = 'NSA' + regimes = db.by_col(r_var) + + w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + w.transform = 'r' + #reg = GM_Error_Hom_Regimes(y, X, regimes, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', vm=True, + # regime_err_sep=True) + #reg = GM_Endog_Error_Hom_Regimes(y, X, yd, q, regimes, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], + # name_q=['discbd'], name_ds='columbus',vm=True, regime_err_sep=True) + reg = GM_Combo_Hom_Regimes(y, X, regimes, yd, q, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], + name_q=['discbd'], name_ds='columbus', vm=True, regime_err_sep=False, regime_lag_sep=False) + print(reg.output) + print(reg.summary) \ No newline at end of file diff --git a/spreg/error_sp_regimes.py b/spreg/error_sp_regimes.py index b1c1feea..8c62c7d8 100644 --- a/spreg/error_sp_regimes.py +++ b/spreg/error_sp_regimes.py @@ -8,7 +8,6 @@ import multiprocessing as mp from . import regimes as REGI from . import user_output as USER -from . import summary_output as SUMMARY from libpysal.weights.spatial_lag import lag_spatial from .ols import BaseOLS from .twosls import BaseTSLS @@ -17,6 +16,10 @@ from .utils import optim_moments, get_spFilter, get_lags from .utils import spdot, RegressionPropsY from .sputils import sphstack +import pandas as pd +from .output import output, _spat_pseudo_r2 +from .error_sp_het_regimes import GM_Error_Het_Regimes, GM_Endog_Error_Het_Regimes, GM_Combo_Het_Regimes +from .error_sp_hom_regimes import GM_Error_Hom_Regimes, GM_Endog_Error_Hom_Regimes, GM_Combo_Hom_Regimes class GM_Error_Regimes(RegressionPropsY, REGI.Regimes_Frame): @@ -55,6 +58,9 @@ class GM_Error_Regimes(RegressionPropsY, REGI.Regimes_Frame): If True, a separate regression is run for each regime. regime_lag_sep: boolean Always False, kept for consistency, ignored. + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the SDEM type. vm : boolean If True, include variance-covariance matrix in summary results @@ -72,10 +78,13 @@ class GM_Error_Regimes(RegressionPropsY, REGI.Regimes_Frame): Name of dataset for use in output name_regimes : string Name of regime variable for use in the output - + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -251,28 +260,15 @@ class GM_Error_Regimes(RegressionPropsY, REGI.Regimes_Frame): values in model.se_betas). Alternatively, we can have a summary of the output by typing: model.summary - >>> print(model.name_x) - ['0_CONSTANT', '0_PS90', '0_UE90', '1_CONSTANT', '1_PS90', '1_UE90', 'lambda'] - >>> np.around(model.betas, decimals=6) - array([[0.074807], - [0.786107], - [0.538849], - [5.103756], - [1.196009], - [0.600533], - [0.364103]]) - >>> np.around(model.std_err, decimals=6) - array([0.379864, 0.152316, 0.051942, 0.471285, 0.19867 , 0.057252]) - >>> np.around(model.z_stat, decimals=6) - array([[ 0.196932, 0.843881], - [ 5.161042, 0. ], - [10.37397 , 0. ], - [10.829455, 0. ], - [ 6.02007 , 0. ], - [10.489215, 0. ]]) - >>> np.around(model.sig2, decimals=6) - 28.172732 - + >>> print(model.output) + var_names coefficients std_err zt_stat prob + 0 0_CONSTANT 0.074811 0.379864 0.196942 0.843873 + 1 0_PS90 0.786105 0.152315 5.161043 0.0 + 2 0_UE90 0.538848 0.051942 10.373969 0.0 + 3 1_CONSTANT 5.103761 0.471284 10.82949 0.0 + 4 1_PS90 1.196009 0.19867 6.020074 0.0 + 5 1_UE90 0.600532 0.057252 10.489217 0.0 + 6 lambda 0.3641 None None None """ def __init__( @@ -289,17 +285,26 @@ def __init__( cols2regi="all", regime_err_sep=False, regime_lag_sep=False, + slx_lags=0, cores=False, name_ds=None, name_regimes=None, + latex=False, ): n = USER.check_arrays(y, x) y = USER.check_y(y, n) USER.check_weights(w, y, w_required=True) - x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) + x_constant, name_x, warn = USER.check_constant( + x, name_x, just_rem=True) set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) + + if slx_lags >0: + lag_x = get_lags(w, x_constant, slx_lags) + x_constant = np.hstack((x_constant, lag_x)) + name_x += USER.set_name_spatial_lags(name_x, slx_lags) + self.name_x_r = USER.set_name_x(name_x, x_constant) self.constant_regi = constant_regi @@ -319,22 +324,24 @@ def __init__( if regime_err_sep == True: if set(cols2regi) == set([True]): self._error_regimes_multi( - y, x_constant, regimes, w, cores, cols2regi, vm, name_x + y, x_constant, regimes, w, slx_lags, cores, cols2regi, vm, name_x, latex ) else: raise Exception( - "All coefficients must vary accross regimes if regime_err_sep = True." + "All coefficients must vary across regimes if regime_err_sep = True." ) else: - x_constant = sphstack(np.ones((x_constant.shape[0], 1)), x_constant) + x_constant = sphstack( + np.ones((x_constant.shape[0], 1)), x_constant) name_x = USER.set_name_x(name_x, x_constant) - self.x, self.name_x = REGI.Regimes_Frame.__init__( + self.x, self.name_x, x_rlist = REGI.Regimes_Frame.__init__( self, x_constant, regimes, constant_regi=None, cols2regi=cols2regi, names=name_x, + rlist=True, ) ols = BaseOLS(y=y, x=self.x) self.k = ols.x.shape[1] @@ -354,14 +361,22 @@ def __init__( self.sig2 = ols2.sig2n self.e_filtered = self.u - lambda1 * lag_spatial(w, self.u) self.vm = self.sig2 * ols2.xtxi - self.title = "SPATIALLY WEIGHTED LEAST SQUARES - REGIMES" + if slx_lags == 0: + self.title = "GM SPATIALLY WEIGHTED MODEL - REGIMES" + else: + self.title = "GM SPATIALLY WEIGHTED MODEL + SLX (SDEM) - REGIMES" self.name_x.append("lambda") self.kf += 1 self.chow = REGI.Chow(self) self._cache = {} - SUMMARY.GM_Error(reg=self, w=w, vm=vm, regimes=True) - - def _error_regimes_multi(self, y, x, regimes, w, cores, cols2regi, vm, name_x): + self.output = pd.DataFrame(self.name_x, + columns=['var_names']) + self.output['var_type'] = ['x']*(len(self.name_x)-1)+['lambda'] + self.output['regime'] = x_rlist + ['_Global'] + self.output['equation'] = 0 + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) + + def _error_regimes_multi(self, y, x, regimes, w, slx_lags, cores, cols2regi, vm, name_x, latex): regi_ids = dict( (r, list(np.where(np.array(regimes) == r)[0])) for r in self.regimes_set ) @@ -394,6 +409,7 @@ def _error_regimes_multi(self, y, x, regimes, w, cores, cols2regi, vm, name_x): name_x + ["lambda"], self.name_w, self.name_regimes, + slx_lags, ), ) else: @@ -409,6 +425,7 @@ def _error_regimes_multi(self, y, x, regimes, w, cores, cols2regi, vm, name_x): name_x + ["lambda"], self.name_w, self.name_regimes, + slx_lags, ) ) @@ -432,6 +449,8 @@ def _error_regimes_multi(self, y, x, regimes, w, cores, cols2regi, vm, name_x): results = {} self.name_y, self.name_x = [], [] + self.output = pd.DataFrame( + columns=['var_names', 'var_type', 'regime', 'equation']) counter = 0 for r in self.regimes_set: """ @@ -446,11 +465,11 @@ def _error_regimes_multi(self, y, x, regimes, w, cores, cols2regi, vm, name_x): results[r] = results_p[r].get() self.vm[ - (counter * self.kr) : ((counter + 1) * self.kr), - (counter * self.kr) : ((counter + 1) * self.kr), + (counter * self.kr): ((counter + 1) * self.kr), + (counter * self.kr): ((counter + 1) * self.kr), ] = results[r].vm self.betas[ - (counter * (self.kr + 1)) : ((counter + 1) * (self.kr + 1)), + (counter * (self.kr + 1)): ((counter + 1) * (self.kr + 1)), ] = results[r].betas self.u[ regi_ids[r], @@ -463,10 +482,14 @@ def _error_regimes_multi(self, y, x, regimes, w, cores, cols2regi, vm, name_x): ] = results[r].e_filtered self.name_y += results[r].name_y self.name_x += results[r].name_x + self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_x, + 'var_type': ['x'] * (len(results[r].name_x) - 1) + + ['lambda'], + 'regime': r, 'equation': r})], ignore_index=True) counter += 1 self.chow = REGI.Chow(self) self.multi = results - SUMMARY.GM_Error_multi(reg=self, multireg=self.multi, vm=vm, regimes=True) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) class GM_Endog_Error_Regimes(RegressionPropsY, REGI.Regimes_Frame): @@ -513,6 +536,9 @@ class GM_Endog_Error_Regimes(RegressionPropsY, REGI.Regimes_Frame): If True, a separate regression is run for each regime. regime_lag_sep: boolean Always False, kept for consistency, ignored. + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the SDEM type. vm : boolean If True, include variance-covariance matrix in summary results @@ -534,9 +560,13 @@ class GM_Endog_Error_Regimes(RegressionPropsY, REGI.Regimes_Frame): Name of dataset for use in output name_regimes : string Name of regime variable for use in the output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -742,22 +772,17 @@ class GM_Endog_Error_Regimes(RegressionPropsY, REGI.Regimes_Frame): endogenous variables included. Alternatively, we can have a summary of the output by typing: model.summary - >>> print(model.name_z) - ['0_CONSTANT', '0_PS90', '0_UE90', '1_CONSTANT', '1_PS90', '1_UE90', '0_RD90', '1_RD90', 'lambda'] - >>> np.around(model.betas, decimals=5) - array([[ 3.59718], - [ 1.0652 ], - [ 0.15822], - [ 9.19754], - [ 1.88082], - [-0.24878], - [ 2.46161], - [ 3.57943], - [ 0.25564]]) - >>> np.around(model.std_err, decimals=6) - array([0.522633, 0.137555, 0.063054, 0.473654, 0.18335 , 0.072786, - 0.300711, 0.240413]) - + >>> print(model.output) + var_names coefficients std_err zt_stat prob + 0 0_CONSTANT 3.597178 0.522633 6.882796 0.0 + 1 0_PS90 1.065203 0.137555 7.743852 0.0 + 2 0_UE90 0.15822 0.063054 2.509282 0.012098 + 6 0_RD90 2.461609 0.300711 8.185967 0.0 + 3 1_CONSTANT 9.197542 0.473654 19.418268 0.0 + 4 1_PS90 1.880815 0.18335 10.258046 0.0 + 5 1_UE90 -0.248777 0.072786 -3.417919 0.000631 + 7 1_RD90 3.579429 0.240413 14.888666 0.0 + 8 lambda 0.255639 None None None """ def __init__( @@ -774,6 +799,7 @@ def __init__( cols2regi="all", regime_err_sep=False, regime_lag_sep=False, + slx_lags=0, name_y=None, name_x=None, name_yend=None, @@ -783,14 +809,22 @@ def __init__( name_regimes=None, summ=True, add_lag=False, + latex=False, ): n = USER.check_arrays(y, x, yend, q) y = USER.check_y(y, n) USER.check_weights(w, y, w_required=True) - x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) + x_constant, name_x, warn = USER.check_constant( + x, name_x, just_rem=True) set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) + + if slx_lags > 0: + lag_x = get_lags(w, x_constant, slx_lags) + x_constant = np.hstack((x_constant, lag_x)) + name_x += USER.set_name_spatial_lags(name_x, slx_lags) + self.constant_regi = constant_regi self.cols2regi = cols2regi self.name_ds = USER.set_name_ds(name_ds) @@ -822,6 +856,7 @@ def __init__( w, yend, q, + slx_lags, cores, cols2regi, vm, @@ -829,26 +864,29 @@ def __init__( name_yend, name_q, add_lag, + latex, ) else: raise Exception( - "All coefficients must vary accross regimes if regime_err_sep = True." + "All coefficients must vary across regimes if regime_err_sep = True." ) else: - x_constant = sphstack(np.ones((x_constant.shape[0], 1)), x_constant) + x_constant = sphstack( + np.ones((x_constant.shape[0], 1)), x_constant) name_x = USER.set_name_x(name_x, x_constant) q, name_q = REGI.Regimes_Frame.__init__( self, q, regimes, constant_regi=None, cols2regi="all", names=name_q ) - x, name_x = REGI.Regimes_Frame.__init__( + x, name_x, x_rlist = REGI.Regimes_Frame.__init__( self, x_constant, regimes, constant_regi=None, cols2regi=cols2regi, names=name_x, + rlist=True, ) - yend2, name_yend = REGI.Regimes_Frame.__init__( + yend2, name_yend, yend_rlist = REGI.Regimes_Frame.__init__( self, yend, regimes, @@ -856,6 +894,7 @@ def __init__( cols2regi=cols2regi, yend=True, names=name_yend, + rlist=True, ) tsls = BaseTSLS(y=y, x=x, yend=yend2, q=q) @@ -896,9 +935,19 @@ def __init__( self.kf += 1 self.chow = REGI.Chow(self) self._cache = {} + self.output = pd.DataFrame(self.name_z, + columns=['var_names']) + self.output['var_type'] = [ + 'x'] * len(self.name_x) + ['yend'] * len(self.name_yend) + ['lambda'] + self.output['regime'] = x_rlist + yend_rlist + ['_Global'] + self.output['equation'] = 0 if summ: - self.title = "SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES - REGIMES" - SUMMARY.GM_Endog_Error(reg=self, w=w, vm=vm, regimes=True) + if slx_lags == 0: + self.title = ("GM SPATIALLY WEIGHTED 2SLS - REGIMES") + else: + self.title = ("GM SPATIALLY WEIGHTED 2SLS WITH SLX (SDEM) - REGIMES") + output(reg=self, vm=vm, robust=False, + other_end=False, latex=latex) def _endog_error_regimes_multi( self, @@ -908,6 +957,7 @@ def _endog_error_regimes_multi( w, yend, q, + slx_lags, cores, cols2regi, vm, @@ -915,6 +965,7 @@ def _endog_error_regimes_multi( name_yend, name_q, add_lag, + latex, ): regi_ids = dict( @@ -959,6 +1010,7 @@ def _endog_error_regimes_multi( self.name_w, self.name_regimes, add_lag, + slx_lags, ), ) else: @@ -979,6 +1031,7 @@ def _endog_error_regimes_multi( self.name_w, self.name_regimes, add_lag, + slx_lags, ) ) @@ -1009,6 +1062,8 @@ def _endog_error_regimes_multi( self.name_h, ) = ([], [], [], [], [], []) counter = 0 + self.output = pd.DataFrame( + columns=['var_names', 'var_type', 'regime', 'equation']) for r in self.regimes_set: """ if is_win: @@ -1022,11 +1077,11 @@ def _endog_error_regimes_multi( results[r] = results_p[r].get() self.vm[ - (counter * self.kr) : ((counter + 1) * self.kr), - (counter * self.kr) : ((counter + 1) * self.kr), + (counter * self.kr): ((counter + 1) * self.kr), + (counter * self.kr): ((counter + 1) * self.kr), ] = results[r].vm self.betas[ - (counter * (self.kr + 1)) : ((counter + 1) * (self.kr + 1)), + (counter * (self.kr + 1)): ((counter + 1) * (self.kr + 1)), ] = results[r].betas self.u[ regi_ids[r], @@ -1050,15 +1105,20 @@ def _endog_error_regimes_multi( self.e_pred[ regi_ids[r], ] = results[r].e_pred + results[r].other_top = _spat_pseudo_r2(results[r]) + v_type = ['x'] * len(results[r].name_x) + ['yend'] * \ + (len(results[r].name_yend) - 1) + ['rho', 'lambda'] + else: + results[r].other_top = "" + v_type = ['x'] * len(results[r].name_x) + ['yend'] * \ + len(results[r].name_yend) + ['lambda'] + self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_z, + 'var_type': v_type, + 'regime': r, 'equation': r})], ignore_index=True) counter += 1 self.chow = REGI.Chow(self) self.multi = results - if add_lag != False: - SUMMARY.GM_Combo_multi(reg=self, multireg=self.multi, vm=vm, regimes=True) - else: - SUMMARY.GM_Endog_Error_multi( - reg=self, multireg=self.multi, vm=vm, regimes=True - ) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) class GM_Combo_Regimes(GM_Endog_Error_Regimes, REGI.Regimes_Frame): @@ -1107,6 +1167,9 @@ class GM_Combo_Regimes(GM_Endog_Error_Regimes, REGI.Regimes_Frame): If True, the spatial parameter for spatial lag is also computed according to different regimes. If False (default), the spatial parameter is fixed accross regimes. + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the GNSM type. w_lags : integer Orders of W to include as instruments for the spatially lagged dependent variable. For example, w_lags=1, then @@ -1135,9 +1198,13 @@ class GM_Combo_Regimes(GM_Endog_Error_Regimes, REGI.Regimes_Frame): Name of dataset for use in output name_regimes : string Name of regime variable for use in the output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -1350,22 +1417,16 @@ class GM_Combo_Regimes(GM_Endog_Error_Regimes, REGI.Regimes_Frame): output by typing: model.summary Alternatively, we can check the betas: - >>> print(model.name_z) - ['0_CONSTANT', '0_PS90', '0_UE90', '1_CONSTANT', '1_PS90', '1_UE90', '_Global_W_HR90', 'lambda'] - >>> print(np.around(model.betas,4)) - [[ 1.4607] - [ 0.958 ] - [ 0.5658] - [ 9.113 ] - [ 1.1338] - [ 0.6517] - [-0.4583] - [ 0.6136]] - - And lambda: - - >>> print('lambda: ', np.around(model.betas[-1], 4)) - lambda: [0.6136] + >>> print(model.output) + var_names coefficients std_err zt_stat prob + 0 0_CONSTANT 1.460707 0.704174 2.074356 0.038046 + 1 0_PS90 0.95795 0.171485 5.586214 0.0 + 2 0_UE90 0.565805 0.053665 10.543203 0.0 + 3 1_CONSTANT 9.112998 1.525875 5.972311 0.0 + 4 1_PS90 1.13382 0.20552 5.51683 0.0 + 5 1_UE90 0.65169 0.061106 10.664938 0.0 + 6 _Global_W_HR90 -0.458326 0.145599 -3.147859 0.001645 + 7 lambda 0.613599 None None None This class also allows the user to run a spatial lag+error model with the extra feature of including non-spatial endogenous regressors. This means @@ -1383,24 +1444,18 @@ class GM_Combo_Regimes(GM_Endog_Error_Regimes, REGI.Regimes_Frame): And then we can run and explore the model analogously to the previous combo: >>> model = GM_Combo_Regimes(y, x, regimes, yd, q, w=w, name_y=y_var, name_x=x_var, name_yend=yd_var, name_q=q_var, name_regimes=r_var, name_ds='NAT') - >>> print(model.name_z) - ['0_CONSTANT', '0_PS90', '0_UE90', '1_CONSTANT', '1_PS90', '1_UE90', '0_RD90', '1_RD90', '_Global_W_HR90', 'lambda'] - >>> print(model.betas) - [[ 3.41963782] - [ 1.04065841] - [ 0.16634393] - [ 8.86544628] - [ 1.85120528] - [-0.24908469] - [ 2.43014046] - [ 3.61645481] - [ 0.03308671] - [ 0.18684992]] - >>> print(np.sqrt(model.vm.diagonal())) - [0.53067577 0.13271426 0.06058025 0.76406411 0.17969783 0.07167421 - 0.28943121 0.25308326 0.06126529] - >>> print('lambda: ', np.around(model.betas[-1], 4)) - lambda: [0.1868] + >>> print(model.output) + var_names coefficients std_err zt_stat prob + 0 0_CONSTANT 3.419638 0.530676 6.443931 0.0 + 1 0_PS90 1.040658 0.132714 7.841346 0.0 + 2 0_UE90 0.166344 0.06058 2.745844 0.006036 + 6 0_RD90 2.43014 0.289431 8.396263 0.0 + 3 1_CONSTANT 8.865446 0.764064 11.603014 0.0 + 4 1_PS90 1.851205 0.179698 10.301769 0.0 + 5 1_UE90 -0.249085 0.071674 -3.475235 0.00051 + 7 1_RD90 3.616455 0.253083 14.289586 0.0 + 8 _Global_W_HR90 0.033087 0.061265 0.540057 0.589158 + 9 lambda 0.18685 None None None """ def __init__( @@ -1411,6 +1466,7 @@ def __init__( yend=None, q=None, w=None, + slx_lags=0, w_lags=1, lag_q=True, cores=False, @@ -1426,22 +1482,41 @@ def __init__( name_w=None, name_ds=None, name_regimes=None, + latex=False, ): - + if regime_lag_sep and not regime_err_sep: + set_warn(self, "regime_err_sep set to True when regime_lag_sep=True.") + regime_err_sep = True + if regime_err_sep and not regime_lag_sep: + set_warn(self, "regime_err_sep set to False when regime_lag_sep=False.") + regime_err_sep = False n = USER.check_arrays(y, x) y = USER.check_y(y, n) USER.check_weights(w, y, w_required=True) - x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) + x_constant, name_x, warn = USER.check_constant( + x, name_x, just_rem=True) set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) self.name_y = USER.set_name_y(name_y) name_yend = USER.set_name_yend(name_yend, yend) name_q = USER.set_name_q(name_q, q) - name_q.extend(USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True)) - cols2regi = REGI.check_cols2regi( - constant_regi, cols2regi, x_constant, yend=yend, add_cons=False - ) + if regime_err_sep and any(col != True for col in cols2regi): + set_warn(self, "All coefficients must vary across regimes if regime_err_sep = True, so setting cols2regi = 'all'.") + cols2regi = "all" + + if slx_lags > 0: + yend2, q2, wx = set_endog(y, x_constant, w, yend, q, w_lags, lag_q, slx_lags) + x_constant = np.hstack((x_constant, wx)) + name_slx = USER.set_name_spatial_lags(name_x, slx_lags) + name_q.extend(USER.set_name_q_sp(name_slx[-len(name_x):], w_lags, name_q, lag_q, force_all=True)) + name_x += name_slx + cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant[:, :-1], yend=yend2, add_cons=False) + else: + name_q.extend(USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True)) + yend2, q2 = yend, q + cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant, yend=yend2, add_cons=False) + self.regimes_set = REGI._get_regimes_set(regimes) self.regimes = regimes USER.check_regimes(self.regimes_set, n, x_constant.shape[1]) @@ -1449,27 +1524,27 @@ def __init__( self.regime_lag_sep = regime_lag_sep if regime_lag_sep == True: - if regime_err_sep == False: - raise Exception( - "For spatial combo models, if spatial lag is set by regimes (regime_lag_sep=True), spatial error must also be set by regimes (regime_err_sep=True)." - ) - add_lag = [w_lags, lag_q] + if slx_lags == 0: + add_lag = [w_lags, lag_q] + else: + add_lag = False + cols2regi += [True] + else: - if regime_err_sep == True: - raise Exception( - "For spatial combo models, if spatial error is set by regimes (regime_err_sep=True), all coefficients including lambda (regime_lag_sep=True) must be set by regimes." - ) - cols2regi += [False] add_lag = False - yend, q = set_endog(y, x_constant, w, yend, q, w_lags, lag_q) + cols2regi += [False] + if slx_lags == 0: + yend2, q2 = set_endog(y, x_constant, w, yend2, q2, w_lags, lag_q) + name_yend.append(USER.set_name_yend_sp(self.name_y)) + print(cols2regi, x_constant.shape[1], yend2.shape[1], name_x, name_yend, name_q) GM_Endog_Error_Regimes.__init__( self, y=y, x=x_constant, - yend=yend, - q=q, + yend=yend2, + q=q2, regimes=regimes, w=w, vm=vm, @@ -1486,17 +1561,399 @@ def __init__( name_regimes=name_regimes, summ=False, add_lag=add_lag, + latex=latex, ) if regime_err_sep != True: self.rho = self.betas[-2] self.predy_e, self.e_pred, warn = sp_att( - w, self.y, self.predy, yend[:, -1].reshape(self.n, 1), self.rho + w, self.y, self.predy, yend2[:, -1].reshape(self.n, 1), self.rho ) set_warn(self, warn) - self.title = "SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES - REGIMES" - SUMMARY.GM_Combo(reg=self, w=w, vm=vm, regimes=True) + if slx_lags == 0: + self.title = "SPATIALLY WEIGHTED 2SLS - GM-COMBO MODEL - REGIMES" + else: + self.title = "SPATIALLY WEIGHTED 2SLS - GM-COMBO WITH SLX (GNSM) - REGIMES" + self.output.iat[-2, + self.output.columns.get_loc('var_type')] = 'rho' + self.other_top = _spat_pseudo_r2(self) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) + + +class GMM_Error_Regimes(GM_Error_Regimes, GM_Combo_Regimes, GM_Endog_Error_Regimes, + GM_Error_Het_Regimes, GM_Combo_Het_Regimes, GM_Endog_Error_Het_Regimes, + GM_Error_Hom_Regimes, GM_Combo_Hom_Regimes, GM_Endog_Error_Hom_Regimes + ): + + """ + Wrapper function to call any of the GM methods for a spatial error regimes model available in spreg + + Parameters + ---------- + y : array + nx1 array for dependent variable + x : array + Two dimensional array with n rows and one column for each + independent (exogenous) variable, excluding the constant + regimes : list + List of n values with the mapping of each + observation to a regime. Assumed to be aligned with 'x'. + w : pysal W object + Spatial weights object (always needed) + yend : array + Two dimensional array with n rows and one column for each + endogenous variable (if any) + q : array + Two dimensional array with n rows and one column for each + external exogenous variable to use as instruments (if any) + (note: this should not contain any variables from x) + estimator : string + Choice of estimator to be used. Options are: 'het', which + is robust to heteroskedasticity, 'hom', which assumes + homoskedasticity, and 'kp98', which does not provide + inference on the spatial parameter for the error term. + constant_regi: string, optional + Switcher controlling the constant term setup. It may take + the following values: + + * 'one': a vector of ones is appended to x and held constant across regimes. + + * 'many': a vector of ones is appended to x and considered different per regime (default). + cols2regi : list, 'all' + Argument indicating whether each + column of x should be considered as different per regime + or held constant across regimes (False). + If a list, k booleans indicating for each variable the + option (True if one per regime, False to be held constant). + If 'all' (default), all the variables vary by regime. + regime_err_sep: boolean + If True, a separate regression is run for each regime. + regime_lag_sep: boolean + Always False, kept for consistency, ignored. + add_wy : boolean + If True, then a spatial lag of the dependent variable is included. + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the SDEM or GNSM type. + vm : boolean + If True, include variance-covariance matrix in summary + results + name_y : string + Name of dependent variable for use in output + name_x : list of strings + Names of independent variables for use in output + name_w : string + Name of weights matrix for use in output + name_regimes : string + Name of regime variable for use in the output + name_yend : list of strings + Names of endogenous variables for use in output + name_q : list of strings + Names of instruments for use in output + name_ds : string + Name of dataset for use in output + latex : boolean + Specifies if summary is to be printed in latex format + **kwargs : keywords + Additional arguments to pass on to the estimators. + See the specific functions for details on what can be used. + + Attributes + ---------- + output : dataframe + regression results pandas dataframe + summary : string + Summary of regression results and diagnostics (note: use in + conjunction with the print command) + betas : array + kx1 array of estimated coefficients + u : array + nx1 array of residuals + e_filtered : array + nx1 array of spatially filtered residuals + predy : array + nx1 array of predicted y values + n : integer + Number of observations + k : integer + Number of variables for which coefficients are estimated + (including the constant) + y : array + nx1 array for dependent variable + x : array + Two dimensional array with n rows and one column for each + independent (exogenous) variable, including the constant + mean_y : float + Mean of dependent variable + std_y : float + Standard deviation of dependent variable + pr2 : float + Pseudo R squared (squared correlation between y and ypred) + vm : array + Variance covariance matrix (kxk) + sig2 : float + Sigma squared used in computations + std_err : array + 1xk array of standard errors of the betas + z_stat : list of tuples + z statistic; each tuple contains the pair (statistic, + p-value), where each is a float + name_y : string + Name of dependent variable for use in output + name_x : list of strings + Names of independent variables for use in output + name_w : string + Name of weights matrix for use in output + name_ds : string + Name of dataset for use in output + name_regimes : string + Name of regime variable for use in the output + title : string + Name of the regression method used + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) + regimes : list + List of n values with the mapping of each + observation to a regime. Assumed to be aligned with 'x'. + constant_regi: string + Ignored if regimes=False. Constant option for regimes. + Switcher controlling the constant term setup. It may take + the following values: + * 'one': a vector of ones is appended to x and held constant across regimes + + * 'many': a vector of ones is appended to x and considered different per regime + cols2regi : list, 'all' + Ignored if regimes=False. Argument indicating whether each + column of x should be considered as different per regime + or held constant across regimes (False). + If a list, k booleans indicating for each variable the + option (True if one per regime, False to be held constant). + If 'all', all the variables vary by regime. + regime_err_sep: boolean + If True, a separate regression is run for each regime. + kr : int + Number of variables/columns to be "regimized" or subject + to change by regime. These will result in one parameter + estimate by regime for each variable (i.e. nr parameters per + variable) + kf : int + Number of variables/columns to be considered fixed or + global across regimes and hence only obtain one parameter + estimate + nr : int + Number of different regimes in the 'regimes' list + name_yend : list of strings (optional) + Names of endogenous variables for use in output + name_z : list of strings (optional) + Names of exogenous and endogenous variables for use in + output + name_q : list of strings (optional) + Names of external instruments + name_h : list of strings (optional) + Names of all instruments used in ouput + multi : dictionary + Only available when multiple regressions are estimated, + i.e. when regime_err_sep=True and no variable is fixed + across regimes. + Contains all attributes of each individual regression + + Examples + -------- + + We first need to import the needed modules, namely numpy to convert the + data we read into arrays that ``spreg`` understands and ``libpysal`` to + handle the weights and file management. + + >>> import numpy as np + >>> import libpysal + >>> from libpysal.examples import load_example + + Open data on NCOVR US County Homicides (3085 areas) using libpysal.io.open(). + This is the DBF associated with the NAT shapefile. Note that + libpysal.io.open() also reads data in CSV format; since the actual class + requires data to be passed in as numpy arrays, the user can read their + data in using any method. + + >>> nat = load_example('Natregimes') + >>> db = libpysal.io.open(nat.get_path("natregimes.dbf"),'r') + + Extract the HR90 column (homicide rates in 1990) from the DBF file and make it the + dependent variable for the regression. Note that PySAL requires this to be + an numpy array of shape (n, 1) as opposed to the also common shape of (n, ) + that other packages accept. + + >>> y_var = 'HR90' + >>> y = np.array([db.by_col(y_var)]).reshape(3085,1) + + Extract UE90 (unemployment rate) and PS90 (population structure) vectors from + the DBF to be used as independent variables in the regression. Other variables + can be inserted by adding their names to x_var, such as x_var = ['Var1','Var2','...] + Note that PySAL requires this to be an nxj numpy array, where j is the + number of independent variables (not including a constant). By default + this model adds a vector of ones to the independent variables passed in. + + >>> x_var = ['PS90','UE90'] + >>> x = np.array([db.by_col(name) for name in x_var]).T + + The different regimes in this data are given according to the North and + South dummy (SOUTH). + + >>> r_var = 'SOUTH' + >>> regimes = db.by_col(r_var) + + Since we want to run a spatial error model, we need to specify + the spatial weights matrix that includes the spatial configuration of the + observations. To do that, we can open an already existing gal file or + create a new one. In this case, we will create one from ``NAT.shp``. + + >>> w = libpysal.weights.Rook.from_shapefile(nat.get_path("natregimes.shp")) + + Unless there is a good reason not to do it, the weights have to be + row-standardized so every row of the matrix sums to one. Among other + things, this allows to interpret the spatial lag of a variable as the + average value of the neighboring observations. In PySAL, this can be + easily performed in the following way: + + >>> w.transform = 'r' + + The GMM_Error_Regimes class can run error models and SARAR models, that is a spatial lag+error model. + In this example we will run a simple version of the latter, where we have the + spatial effects as well as exogenous variables. Since it is a spatial + model, we have to pass in the weights matrix. If we want to + have the names of the variables printed in the output summary, we will + have to pass them in as well, although this is optional. + + >>> from spreg import GMM_Error_Regimes + >>> model = GMM_Error_Regimes(y, x, regimes, w=w, add_wy=True, name_y=y_var, name_x=x_var, name_regimes=r_var, name_ds='NAT') + + Once we have run the model, we can explore a little bit the output. The + regression object we have created has many attributes so take your time to + discover them. + + >>> print(model.output) + var_names coefficients std_err zt_stat prob + 0 0_CONSTANT 1.461317 0.848361 1.722517 0.084976 + 1 0_PS90 0.958711 0.239834 3.997388 0.000064 + 2 0_UE90 0.565825 0.063726 8.879088 0.0 + 3 1_CONSTANT 9.115738 1.976874 4.611189 0.000004 + 4 1_PS90 1.132419 0.334107 3.389387 0.0007 + 5 1_UE90 0.651804 0.105518 6.177197 0.0 + 6 _Global_W_HR90 -0.458677 0.180997 -2.534173 0.011271 + 7 lambda 0.734354 0.035255 20.829823 0.0 + + This class also allows the user to run a spatial lag+error model with the + extra feature of including non-spatial endogenous regressors. This means + that, in addition to the spatial lag and error, we consider some of the + variables on the right-hand side of the equation as endogenous and we + instrument for this. In this case we consider RD90 (resource deprivation) + as an endogenous regressor. We use FP89 (families below poverty) + for this and hence put it in the instruments parameter, 'q'. + + >>> yd_var = ['RD90'] + >>> yd = np.array([db.by_col(name) for name in yd_var]).T + >>> q_var = ['FP89'] + >>> q = np.array([db.by_col(name) for name in q_var]).T + + And then we can run and explore the model analogously to the previous combo: + + >>> model = GMM_Error_Regimes(y, x, regimes, yend=yd, q=q, w=w, add_wy=True, name_y=y_var, name_x=x_var, name_yend=yd_var, name_q=q_var, name_regimes=r_var, name_ds='NAT') + >>> print(model.output) + var_names coefficients std_err zt_stat prob + 0 0_CONSTANT 1.461317 0.848361 1.722517 0.084976 + 1 0_PS90 0.958711 0.239834 3.997388 0.000064 + 2 0_UE90 0.565825 0.063726 8.879088 0.0 + 3 1_CONSTANT 9.115738 1.976874 4.611189 0.000004 + 4 1_PS90 1.132419 0.334107 3.389387 0.0007 + 5 1_UE90 0.651804 0.105518 6.177197 0.0 + 6 _Global_W_HR90 -0.458677 0.180997 -2.534173 0.011271 + 7 lambda 0.734354 0.035255 20.829823 0.0 + + The class also allows for estimating a GNS model by adding spatial lags of the exogenous variables, using the argument slx_lags: + + >>> model = GMM_Error_Regimes(y, x, regimes, w=w, add_wy=True, slx_lags=1, name_y=y_var, name_x=x_var, name_regimes=r_var, name_ds='NAT') + >>> print(model.output) + var_names coefficients std_err zt_stat prob + 0 0_CONSTANT 0.192699 0.256922 0.75003 0.453237 + 1 0_PS90 1.098019 0.232054 4.731743 0.000002 + 2 0_UE90 0.606622 0.07762 7.815325 0.0 + 3 0_W_PS90 -1.068778 0.203911 -5.241381 0.0 + 4 0_W_UE90 -0.657932 0.176073 -3.73671 0.000186 + 5 1_CONSTANT -0.104299 1.790953 -0.058237 0.95356 + 6 1_PS90 1.219796 0.316425 3.854936 0.000116 + 7 1_UE90 0.678922 0.120491 5.634647 0.0 + 8 1_W_PS90 -1.308599 0.536231 -2.440366 0.014672 + 9 1_W_UE90 -0.708492 0.167057 -4.24102 0.000022 + 10 _Global_W_HR90 1.033956 0.269252 3.840111 0.000123 + 11 lambda -0.384968 0.192256 -2.002366 0.045245 + + + """ + + def __init__( + self, y, x, regimes, w, yend=None, q=None, estimator='het', constant_regi="many", cols2regi="all", regime_err_sep=False, + regime_lag_sep=False, add_wy=False, slx_lags=0, vm=False, name_y=None, name_x=None, name_w=None, name_regimes=None, name_yend=None, + name_q=None, name_ds=None, latex=False, **kwargs): + + if estimator == 'het': + if yend is None and not add_wy: + GM_Error_Het_Regimes.__init__(self, y=y, x=x, regimes=regimes, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, + name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) + elif yend is not None and not add_wy: + GM_Endog_Error_Het_Regimes.__init__(self, y=y, x=x, regimes=regimes, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) + elif add_wy: + GM_Combo_Het_Regimes.__init__(self, y=y, x=x, regimes=regimes, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, regime_lag_sep=regime_lag_sep, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) + else: + set_warn(self, 'Combination of arguments passed to GMM_Error_Regimes not allowed. Using default arguments instead.') + GM_Error_Het_Regimes.__init__(self, y=y, x=x, regimes=regimes, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, + name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex) + elif estimator == 'hom': + if yend is None and not add_wy: + GM_Error_Hom_Regimes.__init__(self, y=y, x=x, regimes=regimes, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, + name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) + elif yend is not None and not add_wy: + GM_Endog_Error_Hom_Regimes.__init__(self, y=y, x=x, regimes=regimes, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) + elif add_wy: + GM_Combo_Hom_Regimes.__init__(self, y=y, x=x, regimes=regimes, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, regime_lag_sep=regime_lag_sep, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) + else: + set_warn(self, 'Combination of arguments passed to GMM_Error_Regimes not allowed. Using default arguments instead.') + GM_Error_Hom_Regimes.__init__(self, y=y, x=x, regimes=regimes, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, + name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex) + elif estimator == 'kp98': + if yend is None and not add_wy: + GM_Error_Regimes.__init__(self, y=y, x=x, regimes=regimes, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, + name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) + elif yend is not None and not add_wy: + GM_Endog_Error_Regimes.__init__(self, y=y, x=x, regimes=regimes, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) + elif add_wy: + GM_Combo_Regimes.__init__(self, y=y, x=x, regimes=regimes, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, regime_lag_sep=regime_lag_sep, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) + else: + set_warn(self, 'Combination of arguments passed to GMM_Error_Regimes not allowed. Using default arguments instead.') + GM_Error_Regimes.__init__(self, y=y, x=x, regimes=regimes, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, + name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex) + else: + set_warn(self, 'Combination of arguments passed to GMM_Error_Regimes not allowed. Using default arguments instead.') + GM_Error_Het_Regimes.__init__(self, y=y, x=x, regimes=regimes, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, + name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex) def _work_error(y, x, regi_ids, r, w, name_ds, name_y, name_x, name_w, name_regimes): w_r, warn = REGI.w_regime(w, regi_ids[r], r, transform=True) @@ -1505,7 +1962,7 @@ def _work_error(y, x, regi_ids, r, w, name_ds, name_y, name_x, name_w, name_regi model = BaseGM_Error(y_r, x_r, w_r.sparse) set_warn(model, warn) model.w = w_r - model.title = "SPATIALLY WEIGHTED LEAST SQUARES ESTIMATION - REGIME %s" % r + model.title = "GM SPATIALLY WEIGHTED LEAST SQUARES ESTIMATION - REGIME %s" % r model.name_ds = name_ds model.name_y = "%s_%s" % (str(r), name_y) model.name_x = ["%s_%s" % (str(r), i) for i in name_x] @@ -1530,6 +1987,7 @@ def _work_endog_error( name_w, name_regimes, add_lag, + slx_lags, ): w_r, warn = REGI.w_regime(w, regi_ids[r], r, transform=True) y_r = y[regi_ids[r]] @@ -1548,16 +2006,26 @@ def _work_endog_error( if add_lag != False: model.rho = model.betas[-2] model.predy_e, model.e_pred, warn = sp_att( - w_r, model.y, model.predy, model.yend[:, -1].reshape(model.n, 1), model.rho + w_r, model.y, model.predy, model.yend[:, - + 1].reshape(model.n, 1), model.rho ) set_warn(model, warn) model.w = w_r - model.title = "SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES - REGIME %s" % r + if slx_lags == 0: + if add_lag != False: + model.title = "SPATIALLY WEIGHTED 2SLS - GM-COMBO MODEL - REGIME %s" % r + else: + model.title = "SPATIALLY WEIGHTED 2SLS (GM) - REGIME %s" % r + else: + if add_lag != False: + model.title = "GM SPATIAL COMBO MODEL + SLX (GNSM) - REGIME %s" % r + else: + model.title = "GM SPATIALLY WEIGHTED 2SLS + SLX (SDEM) - REGIME %s" % r model.name_ds = name_ds model.name_y = "%s_%s" % (str(r), name_y) model.name_x = ["%s_%s" % (str(r), i) for i in name_x] model.name_yend = ["%s_%s" % (str(r), i) for i in name_yend] - model.name_z = model.name_x + model.name_yend + ["lambda"] + model.name_z = model.name_x + model.name_yend + [str(r)+"_lambda"] model.name_q = ["%s_%s" % (str(r), i) for i in name_q] model.name_h = model.name_x + model.name_q model.name_w = name_w @@ -1575,31 +2043,34 @@ def _test(): if __name__ == "__main__": - _test() - import libpysal import numpy as np + import libpysal - dbf = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") - y = np.array([dbf.by_col("CRIME")]).T - names_to_extract = ["INC"] - x = np.array([dbf.by_col(name) for name in names_to_extract]).T - yd_var = ["HOVAL"] - yend = np.array([dbf.by_col(name) for name in yd_var]).T - q_var = ["DISCBD"] - q = np.array([dbf.by_col(name) for name in q_var]).T - regimes = regimes = dbf.by_col("NSA") - w = libpysal.io.open(libpysal.examples.get_path("columbus.gal"), "r").read() - w.transform = "r" - model = GM_Error_Regimes( - y, - x, - regimes=regimes, - w=w, - name_y="crime", - name_x=["income"], - name_regimes="nsa", - name_ds="columbus", - regime_err_sep=True, - ) - print(model.summary) + db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'), 'r') + y = np.array(db.by_col("HOVAL")) + y = np.reshape(y, (49, 1)) + X = [] + X.append(db.by_col("INC")) + X = np.array(X).T + yd = [] + yd.append(db.by_col("CRIME")) + yd = np.array(yd).T + q = [] + q.append(db.by_col("DISCBD")) + q = np.array(q).T + + r_var = 'NSA' + regimes = db.by_col(r_var) + + w = libpysal.weights.Rook.from_shapefile( + libpysal.examples.get_path("columbus.shp")) + w.transform = 'r' + # reg = GM_Error_Regimes(y, X, regimes, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', + # regime_err_sep=True) + # reg = GM_Endog_Error_Regimes(y, X, yd, q, regimes, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], + # name_q=['discbd'], name_ds='columbus', regime_err_sep=True) + reg = GM_Combo_Regimes(y, X, regimes, yd, q, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], + name_q=['discbd'], name_ds='columbus', regime_err_sep=True, regime_lag_sep=True) + print(reg.output) + print(reg.summary) diff --git a/spreg/ml_error.py b/spreg/ml_error.py index 2d505324..bb338677 100644 --- a/spreg/ml_error.py +++ b/spreg/ml_error.py @@ -10,12 +10,13 @@ import numpy.linalg as la from scipy import sparse as sp from scipy.sparse.linalg import splu as SuperLU -from .utils import RegressionPropsY, RegressionPropsVM, set_warn +from .utils import RegressionPropsY, RegressionPropsVM, set_warn, get_lags from . import diagnostics as DIAG from . import user_output as USER -from . import summary_output as SUMMARY from . import regimes as REGI from .w_utils import symmetrize +import pandas as pd +from .output import output, _nonspat_top try: from scipy.optimize import minimize_scalar @@ -310,6 +311,9 @@ class ML_Error(BaseML_Error): independent (exogenous) variable, excluding the constant w : Sparse matrix Spatial weights sparse matrix + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the SDEM type. method : string if 'full', brute force calculation (full matrix expressions) if 'ord', Ord eigenvalue method @@ -327,9 +331,13 @@ class ML_Error(BaseML_Error): Name of weights matrix for use in output name_ds : string Name of dataset for use in output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe betas : array (k+1)x1 array of estimated coefficients (rho first) lam : float @@ -463,6 +471,7 @@ def __init__( y, x, w, + slx_lags=0, method="full", epsilon=0.0000001, vm=False, @@ -470,17 +479,26 @@ def __init__( name_x=None, name_w=None, name_ds=None, + latex=False, ): n = USER.check_arrays(y, x) y = USER.check_y(y, n) USER.check_weights(w, y, w_required=True) x_constant, name_x, warn = USER.check_constant(x, name_x) set_warn(self, warn) + + self.title = "ML SPATIAL ERROR" + if slx_lags >0: + lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + x_constant = np.hstack((x_constant, lag_x)) + name_x += USER.set_name_spatial_lags(name_x, slx_lags) + self.title += " WITH SLX (SDEM)" + self.title += " (METHOD = " + method + ")" + method = method.upper() BaseML_Error.__init__( self, y=y, x=x_constant, w=w, method=method, epsilon=epsilon ) - self.title = "MAXIMUM LIKELIHOOD SPATIAL ERROR" + " (METHOD = " + method + ")" self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) self.name_x = USER.set_name_x(name_x, x) @@ -488,7 +506,11 @@ def __init__( self.name_w = USER.set_name_w(name_w, w) self.aic = DIAG.akaike(reg=self) self.schwarz = DIAG.schwarz(reg=self) - SUMMARY.ML_Error(reg=self, w=w, vm=vm, spat_diag=False) + self.output = pd.DataFrame(self.name_x, columns=['var_names']) + self.output['var_type'] = ['x'] * (len(self.name_x) - 1) + ['lambda'] + self.output['regime'], self.output['equation'] = (0, 0) + self.other_top = _nonspat_top(self, ml=True) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) def err_c_loglik(lam, n, y, ylag, x, xlag, W): @@ -565,3 +587,29 @@ def _test(): np.set_printoptions(suppress=True) doctest.testmod() np.set_printoptions(suppress=start_suppress) + +if __name__ == "__main__": + _test() + + import numpy as np + import libpysal + + db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + y_var = "CRIME" + y = np.array([db.by_col(y_var)]).reshape(49, 1) + x_var = ["INC"] + x = np.array([db.by_col(name) for name in x_var]).T + w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + w.transform = "r" + model = ML_Error( + y, + x, + w=w, + vm=False, + name_y=y_var, + name_x=x_var, + name_ds="columbus", + name_w="columbus.gal", + ) + print(model.output) + print(model.summary) diff --git a/spreg/ml_error_regimes.py b/spreg/ml_error_regimes.py index 03384495..721fd115 100644 --- a/spreg/ml_error_regimes.py +++ b/spreg/ml_error_regimes.py @@ -9,12 +9,13 @@ import multiprocessing as mp from . import regimes as REGI from . import user_output as USER -from . import summary_output as SUMMARY from . import diagnostics as DIAG -from .utils import set_warn +from .utils import set_warn, get_lags from .sputils import sphstack from .ml_error import BaseML_Error from platform import system +import pandas as pd +from .output import output, _nonspat_top __all__ = ["ML_Error_Regimes"] @@ -51,6 +52,9 @@ class ML_Error_Regimes(BaseML_Error, REGI.Regimes_Frame): If 'all' (default), all the variables vary by regime. w : Sparse matrix Spatial weights sparse matrix + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the SDEM type. method : string if 'full', brute force calculation (full matrix expressions) if 'ord', Ord eigenvalue computation @@ -78,9 +82,13 @@ class ML_Error_Regimes(BaseML_Error, REGI.Regimes_Frame): Name of dataset for use in output name_regimes : string Name of regimes variable for use in output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -276,6 +284,7 @@ def __init__( x, regimes, w=None, + slx_lags=0, constant_regi="many", cols2regi="all", method="full", @@ -289,14 +298,13 @@ def __init__( name_w=None, name_ds=None, name_regimes=None, + latex=False, ): n = USER.check_arrays(y, x) y = USER.check_y(y, n) USER.check_weights(w, y, w_required=True) self.constant_regi = constant_regi - self.cols2regi = cols2regi - self.regime_err_sep = regime_err_sep self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) self.name_w = USER.set_name_w(name_w, w) @@ -307,9 +315,16 @@ def __init__( x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) + + if slx_lags >0: + lag_x = get_lags(w, x_constant, slx_lags) + x_constant = np.hstack((x_constant, lag_x)) + name_x += USER.set_name_spatial_lags(name_x, slx_lags) + self.name_x_r = USER.set_name_x(name_x, x_constant) - cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x) + cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant) + self.cols2regi = cols2regi self.regimes_set = REGI._get_regimes_set(regimes) self.regimes = regimes USER.check_regimes(self.regimes_set, self.n, x.shape[1]) @@ -322,16 +337,18 @@ def __init__( x_constant, regimes, w, + slx_lags, cores, method, epsilon, cols2regi, vm, name_x, + latex, ) else: raise Exception( - "All coefficients must vary accross regimes if regime_err_sep = True." + "All coefficients must vary across regimes if regime_err_sep = True." ) else: x_constant = sphstack(np.ones((x_constant.shape[0], 1)), x_constant) @@ -340,15 +357,15 @@ def __init__( regimes_att["x"] = x_constant regimes_att["regimes"] = regimes regimes_att["cols2regi"] = cols2regi - x, name_x = REGI.Regimes_Frame.__init__( + x, name_x, x_rlist = REGI.Regimes_Frame.__init__( self, x_constant, regimes, constant_regi=None, cols2regi=cols2regi, names=name_x, + rlist=True ) - BaseML_Error.__init__( self, y=y, @@ -359,22 +376,26 @@ def __init__( regimes_att=regimes_att, ) - self.title = ( - "MAXIMUM LIKELIHOOD SPATIAL ERROR - REGIMES" - + " (METHOD = " - + method - + ")" - ) + self.title = "ML SPATIAL ERROR" + if slx_lags >0: + self.title += " WITH SLX (SDEM)" + self.title += " - REGIMES (METHOD = " + method + ")" + self.name_x = USER.set_name_x(name_x, x, constant=True) self.name_x.append("lambda") self.kf += 1 # Adding a fixed k to account for lambda. self.chow = REGI.Chow(self) self.aic = DIAG.akaike(reg=self) self.schwarz = DIAG.schwarz(reg=self) - SUMMARY.ML_Error(reg=self, w=w, vm=vm, spat_diag=False, regimes=True) + self.output = pd.DataFrame(self.name_x, columns=['var_names']) + self.output['var_type'] = ['x'] * (len(self.name_x) - 1) + ['lambda'] + self.output['regime'] = x_rlist + ['_Global'] + self.output['equation'] = 0 + self.other_top = _nonspat_top(self, ml=True) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) def _error_regimes_multi( - self, y, x, regimes, w, cores, method, epsilon, cols2regi, vm, name_x + self, y, x, regimes, w, slx_lags, cores, method, epsilon, cols2regi, vm, name_x, latex ): regi_ids = dict( @@ -404,6 +425,7 @@ def _error_regimes_multi( regi_ids, r, w, + slx_lags, method, epsilon, self.name_ds, @@ -421,6 +443,7 @@ def _error_regimes_multi( regi_ids, r, w, + slx_lags, method, epsilon, self.name_ds, @@ -452,6 +475,7 @@ def _error_regimes_multi( results = {} counter = 0 + self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) for r in self.regimes_set: """ if is_win: @@ -482,16 +506,18 @@ def _error_regimes_multi( ] = results[r].e_filtered self.name_y += results[r].name_y self.name_x += results[r].name_x + results[r].other_top = _nonspat_top(results[r], ml=True) + self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_x, + 'var_type': ['x'] * (len(results[r].name_x) - 1) + ['lambda'], + 'regime': r, 'equation': r})], ignore_index=True) counter += 1 self.chow = REGI.Chow(self) self.multi = results - SUMMARY.ML_Error_multi( - reg=self, multireg=self.multi, vm=vm, spat_diag=False, regimes=True, w=w - ) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) def _work_error( - y, x, regi_ids, r, w, method, epsilon, name_ds, name_y, name_x, name_w, name_regimes + y, x, regi_ids, r, w, slx_lags, method, epsilon, name_ds, name_y, name_x, name_w, name_regimes ): w_r, warn = REGI.w_regime(w, regi_ids[r], r, transform=True) y_r = y[regi_ids[r]] @@ -499,13 +525,10 @@ def _work_error( model = BaseML_Error(y=y_r, x=x_r, w=w_r, method=method, epsilon=epsilon) set_warn(model, warn) model.w = w_r - model.title = ( - "MAXIMUM LIKELIHOOD SPATIAL ERROR - REGIME " - + str(r) - + " (METHOD = " - + method - + ")" - ) + model.title = "ML SPATIAL ERROR" + if slx_lags >0: + model.title += " WITH SLX (SDEM)" + model.title += " - REGIME " + str(r) + " (METHOD = " + method + ")" model.name_ds = name_ds model.name_y = "%s_%s" % (str(r), name_y) model.name_x = ["%s_%s" % (str(r), i) for i in name_x] @@ -528,30 +551,23 @@ def _test(): if __name__ == "__main__": _test() import numpy as np - import libpysal + import libpysal as ps - db = libpysal.io.open(libpysal.examples.get_path("baltim.dbf"), "r") + db = ps.io.open(ps.examples.get_path("baltim.dbf"), "r") ds_name = "baltim.dbf" y_name = "PRICE" y = np.array(db.by_col(y_name)).T y.shape = (len(y), 1) x_names = ["NROOM", "NBATH", "PATIO", "FIREPL", "AC", "GAR", "AGE", "LOTSZ", "SQFT"] x = np.array([db.by_col(var) for var in x_names]).T - ww = ps.open(ps.examples.get_path("baltim_q.gal")) + ww = ps.io.open(ps.examples.get_path("baltim_q.gal")) w = ww.read() ww.close() w_name = "baltim_q.gal" w.transform = "r" + regimes = db.by_col("CITCOU") - regimes = [] - y_coord = np.array(db.by_col("Y")) - for i in y_coord: - if i > 544.5: - regimes.append("North") - else: - regimes.append("South") - - mlerror = ML_Error_Regimes( + model = ML_Error_Regimes( y, x, regimes, @@ -561,7 +577,9 @@ def _test(): name_x=x_names, name_w=w_name, name_ds=ds_name, - regime_err_sep=False, - name_regimes="North", + regime_err_sep=True, + constant_regi="many", + name_regimes="CITCOU", ) - print(mlerror.summary) + print(model.output) + print(model.summary) diff --git a/spreg/ml_lag.py b/spreg/ml_lag.py old mode 100644 new mode 100755 index 7d7e2e9f..c3ec794c --- a/spreg/ml_lag.py +++ b/spreg/ml_lag.py @@ -10,11 +10,12 @@ import numpy.linalg as la from scipy import sparse as sp from scipy.sparse.linalg import splu as SuperLU -from .utils import RegressionPropsY, RegressionPropsVM, inverse_prod, set_warn +from .utils import RegressionPropsY, RegressionPropsVM, inverse_prod, set_warn, get_lags from .sputils import spdot, spfill_diagonal, spinv, spbroadcast from . import diagnostics as DIAG from . import user_output as USER -from . import summary_output as SUMMARY +import pandas as pd +from .output import output, _nonspat_top, _spat_pseudo_r2 from .w_utils import symmetrize from libpysal import weights @@ -43,6 +44,9 @@ class BaseML_Lag(RegressionPropsY, RegressionPropsVM): independent (exogenous) variable, excluding the constant w : pysal W object Spatial weights object + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the Spatial Durbin type. method : string if 'full', brute force calculation (full matrix expressions) if 'ord', Ord eigenvalue method @@ -179,25 +183,29 @@ class BaseML_Lag(RegressionPropsY, RegressionPropsVM): """ - def __init__(self, y, x, w, method="full", epsilon=0.0000001): + def __init__(self, y, x, w, slx_lags=0, method="full", epsilon=0.0000001): # set up main regression variables and spatial filters self.y = y self.x = x - self.n, self.k = self.x.shape self.method = method self.epsilon = epsilon # W = w.full()[0] # Wsp = w.sparse ylag = weights.lag_spatial(w, y) # b0, b1, e0 and e1 + + if slx_lags>0: + self.x = np.hstack((self.x, get_lags(w, self.x[:, 1:], slx_lags))) + + self.n, self.k = self.x.shape xtx = spdot(self.x.T, self.x) xtxi = la.inv(xtx) xty = spdot(self.x.T, self.y) xtyl = spdot(self.x.T, ylag) b0 = spdot(xtxi, xty) b1 = spdot(xtxi, xtyl) - e0 = self.y - spdot(x, b0) - e1 = ylag - spdot(x, b1) + e0 = self.y - spdot(self.x, b0) + e1 = ylag - spdot(self.x, b1) methodML = method.upper() # call minimizer using concentrated log-likelihood to get rho if methodML in ["FULL", "LU", "ORD"]: @@ -209,19 +217,19 @@ def __init__(self, y, x, w, method="full", epsilon=0.0000001): bounds=(-1.0, 1.0), args=(self.n, e0, e1, W), method="bounded", - tol=epsilon, + options={'xatol': epsilon}, ) elif methodML == "LU": I = sp.identity(w.n) Wsp = w.sparse # moved here - W = Wsp + W = Wsp#.tocsc() res = minimize_scalar( lag_c_loglik_sp, 0.0, bounds=(-1.0, 1.0), args=(self.n, e0, e1, I, Wsp), method="bounded", - tol=epsilon, + options={'xatol': epsilon}, ) elif methodML == "ORD": # check on symmetry structure @@ -239,7 +247,7 @@ def __init__(self, y, x, w, method="full", epsilon=0.0000001): bounds=(-1.0, 1.0), args=(self.n, e0, e1, evals), method="bounded", - tol=epsilon, + options={'xatol': epsilon}, ) else: # program will crash, need to catch @@ -261,7 +269,7 @@ def __init__(self, y, x, w, method="full", epsilon=0.0000001): self.u = e0 - self.rho * e1 self.predy = self.y - self.u - xb = spdot(x, b) + xb = spdot(self.x, b) self.predy_e = inverse_prod( w.sparse, xb, self.rho, inv_method="power_exp", threshold=epsilon @@ -289,7 +297,7 @@ def __init__(self, y, x, w, method="full", epsilon=0.0000001): wpredy = weights.lag_spatial(w, self.predy_e) wpyTwpy = spdot(wpredy.T, wpredy) - xTwpy = spdot(x.T, wpredy) + xTwpy = spdot(self.x.T, wpredy) # order of variables is beta, rho, sigma2 @@ -321,6 +329,9 @@ class ML_Lag(BaseML_Lag): independent (exogenous) variable, excluding the constant w : pysal W object Spatial weights object + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the Spatial Durbin type. method : string if 'full', brute force calculation (full matrix expressions) if 'ord', Ord eigenvalue method @@ -337,9 +348,13 @@ class ML_Lag(BaseML_Lag): Name of weights matrix for use in output name_ds : string Name of dataset for use in output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe betas : array (k+1)x1 array of estimated coefficients (rho first) rho : float @@ -567,6 +582,7 @@ def __init__( y, x, w, + slx_lags=0, method="full", epsilon=0.0000001, vm=False, @@ -574,6 +590,7 @@ def __init__( name_x=None, name_w=None, name_ds=None, + latex=False, ): n = USER.check_arrays(y, x) y = USER.check_y(y, n) @@ -582,11 +599,16 @@ def __init__( set_warn(self, warn) method = method.upper() BaseML_Lag.__init__( - self, y=y, x=x_constant, w=w, method=method, epsilon=epsilon + self, y=y, x=x_constant, w=w, slx_lags=slx_lags, method=method, epsilon=epsilon ) # increase by 1 to have correct aic and sc, include rho in count self.k += 1 - self.title = "MAXIMUM LIKELIHOOD SPATIAL LAG" + " (METHOD = " + method + ")" + + if slx_lags>0: + name_x += USER.set_name_spatial_lags(name_x, slx_lags) + self.title = "MAXIMUM LIKELIHOOD SPATIAL LAG WITH SLX - SPATIAL DURBIN MODEL" + " (METHOD = " + method + ")" + else: + self.title = "MAXIMUM LIKELIHOOD SPATIAL LAG" + " (METHOD = " + method + ")" self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) self.name_x = USER.set_name_x(name_x, x) @@ -595,8 +617,12 @@ def __init__( self.name_w = USER.set_name_w(name_w, w) self.aic = DIAG.akaike(reg=self) self.schwarz = DIAG.schwarz(reg=self) - SUMMARY.ML_Lag(reg=self, w=w, vm=vm, spat_diag=False) - + self.output = pd.DataFrame(self.name_x, columns=['var_names']) + self.output['var_type'] = ['x'] * (len(self.name_x)-1) + ['rho'] + self.output['regime'], self.output['equation'] = (0, 0) + self.other_top = _spat_pseudo_r2(self) + self.other_top += _nonspat_top(self, ml=True) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) def lag_c_loglik(rho, n, e0, e1, W): # concentrated log-lik for lag model, no constants, brute force @@ -647,3 +673,29 @@ def _test(): np.set_printoptions(suppress=True) doctest.testmod() np.set_printoptions(suppress=start_suppress) + +if __name__ == "__main__": + _test() + + import numpy as np + import libpysal + + db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + y_var = "CRIME" + y = np.array([db.by_col(y_var)]).reshape(49, 1) + x_var = ["INC"] + x = np.array([db.by_col(name) for name in x_var]).T + w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + w.transform = "r" + model = ML_Lag( + y, + x, + w=w, + vm=False, + name_y=y_var, + name_x=x_var, + name_ds="columbus", + name_w="columbus.gal", + ) + print(model.output) + print(model.summary) diff --git a/spreg/ml_lag_regimes.py b/spreg/ml_lag_regimes.py index 4e603d09..a05c00b2 100644 --- a/spreg/ml_lag_regimes.py +++ b/spreg/ml_lag_regimes.py @@ -7,12 +7,14 @@ import numpy as np from . import regimes as REGI from . import user_output as USER -from . import summary_output as SUMMARY from . import diagnostics as DIAG import multiprocessing as mp from .ml_lag import BaseML_Lag -from .utils import set_warn +from .utils import set_warn, get_lags from platform import system +import pandas as pd +from .output import output, _nonspat_top, _spat_pseudo_r2 + __all__ = ["ML_Lag_Regimes"] @@ -55,6 +57,9 @@ class ML_Lag_Regimes(BaseML_Lag, REGI.Regimes_Frame): if 'LU', LU sparse matrix decomposition epsilon : float tolerance criterion in mimimize_scalar function and inverse_product + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the Spatial Durbin type. regime_lag_sep: boolean If True, the spatial parameter for spatial lag is also computed according to different regimes. If False (default), @@ -76,9 +81,13 @@ class ML_Lag_Regimes(BaseML_Lag, REGI.Regimes_Frame): Name of dataset for use in output name_regimes : string Name of regimes variable for use in output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -297,8 +306,8 @@ def __init__( cols2regi="all", method="full", epsilon=0.0000001, + slx_lags=0, regime_lag_sep=False, - regime_err_sep=False, cores=False, vm=False, name_y=None, @@ -306,6 +315,7 @@ def __init__( name_w=None, name_ds=None, name_regimes=None, + latex=False, ): n = USER.check_arrays(y, x) @@ -318,6 +328,11 @@ def __init__( set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) + if slx_lags >0: + lag_x = get_lags(w, x_constant, slx_lags) + x_constant = np.hstack((x_constant, lag_x)) + name_x += USER.set_name_spatial_lags(name_x, slx_lags) + self.name_x_r = USER.set_name_x(name_x, x_constant) + [USER.set_name_yend_sp(name_y)] self.method = method self.epsilon = epsilon @@ -364,6 +379,7 @@ def __init__( w_i, w, regi_ids, + slx_lags=slx_lags, cores=cores, cols2regi=cols2regi, method=method, @@ -374,17 +390,19 @@ def __init__( name_regimes=self.name_regimes, name_w=name_w, name_ds=name_ds, + latex=latex, ) else: # if regime_lag_sep == True: # w = REGI.w_regimes_union(w, w_i, self.regimes_set) - x, self.name_x = REGI.Regimes_Frame.__init__( + x, self.name_x, x_rlist = REGI.Regimes_Frame.__init__( self, x_constant, regimes, constant_regi, cols2regi=cols2regi[:-1], names=name_x, + rlist=True ) self.name_x.append("_Global_" + USER.set_name_yend_sp(name_y)) BaseML_Lag.__init__(self, y=y, x=x, w=w, method=method, epsilon=epsilon) @@ -395,13 +413,17 @@ def __init__( self.aic = DIAG.akaike(reg=self) self.schwarz = DIAG.schwarz(reg=self) self.regime_lag_sep = regime_lag_sep - self.title = ( - "MAXIMUM LIKELIHOOD SPATIAL LAG - REGIMES" - + " (METHOD = " - + method - + ")" - ) - SUMMARY.ML_Lag(reg=self, w=w, vm=vm, spat_diag=False, regimes=True) + self.output = pd.DataFrame(self.name_x, columns=['var_names']) + self.output['var_type'] = ['x'] * (len(self.name_x) - 1) + ['rho'] + self.output['regime'] = x_rlist+['_Global'] + self.output['equation'] = 0 + self.other_top = _spat_pseudo_r2(self) + self.other_top += _nonspat_top(self, ml=True) + if slx_lags == 0: + self.title = ("MAXIMUM LIKELIHOOD SPATIAL LAG - REGIMES"+ " (METHOD = "+ method+ ")") + else: + self.title = ("MAXIMUM LIKELIHOOD SPATIAL DURBIN - REGIMES"+ " (METHOD = "+ method+ ")") + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) def ML_Lag_Regimes_Multi( self, @@ -410,6 +432,7 @@ def ML_Lag_Regimes_Multi( w_i, w, regi_ids, + slx_lags, cores, cols2regi, method, @@ -420,8 +443,9 @@ def ML_Lag_Regimes_Multi( name_regimes, name_w, name_ds, + latex, ): - # pool = mp.Pool(cores) + #pool = mp.Pool(cores) results_p = {} """ for r in self.regimes_set: @@ -445,6 +469,7 @@ def ML_Lag_Regimes_Multi( regi_ids, r, w_i[r], + slx_lags, method, epsilon, name_ds, @@ -462,6 +487,7 @@ def ML_Lag_Regimes_Multi( regi_ids, r, w_i[r], + slx_lags, method, epsilon, name_ds, @@ -496,6 +522,7 @@ def ML_Lag_Regimes_Multi( results = {} self.name_y, self.name_x = [], [] counter = 0 + self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) for r in self.regimes_set: """ if is_win: @@ -528,12 +555,16 @@ def ML_Lag_Regimes_Multi( ] = results[r].e_pred self.name_y += results[r].name_y self.name_x += results[r].name_x + results[r].other_top = _spat_pseudo_r2(results[r]) + results[r].other_top += _nonspat_top(results[r], ml=True) + results[r].other_mid = "" + self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_x, + 'var_type': ['x'] * (len(results[r].name_x) - 1) + ['rho'], + 'regime': r, 'equation': r})], ignore_index=True) counter += 1 self.multi = results self.chow = REGI.Chow(self) - SUMMARY.ML_Lag_multi( - reg=self, multireg=self.multi, vm=vm, spat_diag=False, regimes=True, w=w - ) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) def _work( @@ -542,6 +573,7 @@ def _work( regi_ids, r, w_r, + slx_lags, method, epsilon, name_ds, @@ -553,13 +585,10 @@ def _work( y_r = y[regi_ids[r]] x_r = x[regi_ids[r]] model = BaseML_Lag(y_r, x_r, w_r, method=method, epsilon=epsilon) - model.title = ( - "MAXIMUM LIKELIHOOD SPATIAL LAG - REGIME " - + str(r) - + " (METHOD = " - + method - + ")" - ) + if slx_lags == 0: + model.title = ("MAXIMUM LIKELIHOOD SPATIAL LAG - REGIME "+ str(r)+ " (METHOD = "+ method+ ")") + else: + model.title = ("MAXIMUM LIKELIHOOD SPATIAL DURBIN - REGIME "+ str(r)+ " (METHOD = "+ method+ ")") model.name_ds = name_ds model.name_y = "%s_%s" % (str(r), name_y) model.name_x = ["%s_%s" % (str(r), i) for i in name_x] @@ -583,16 +612,16 @@ def _test(): if __name__ == "__main__": _test() import numpy as np - import libpysal + import libpysal as ps - db = libpysal.io.open(libpysal.examples.get_path("baltim.dbf"), "r") + db = ps.io.open(ps.examples.get_path("baltim.dbf"), "r") ds_name = "baltim.dbf" y_name = "PRICE" y = np.array(db.by_col(y_name)).T y.shape = (len(y), 1) x_names = ["NROOM", "NBATH", "PATIO", "FIREPL", "AC", "GAR", "AGE", "LOTSZ", "SQFT"] x = np.array([db.by_col(var) for var in x_names]).T - ww = ps.open(ps.examples.get_path("baltim_q.gal")) + ww = ps.io.open(ps.examples.get_path("baltim_q.gal")) w = ww.read() ww.close() w_name = "baltim_q.gal" @@ -610,7 +639,9 @@ def _test(): name_w=w_name, name_ds=ds_name, regime_lag_sep=True, + regime_err_sep=False, constant_regi="many", name_regimes="CITCOU", ) + print(mllag.output) print(mllag.summary) diff --git a/spreg/ols.py b/spreg/ols.py index eb739230..f22bac19 100644 --- a/spreg/ols.py +++ b/spreg/ols.py @@ -1,13 +1,13 @@ """Ordinary Least Squares regression classes.""" -__author__ = "Luc Anselin luc.anselin@asu.edu, David C. Folch david.folch@asu.edu" +__author__ = "Luc Anselin lanselin@gmail.com, Pedro Amaral pedrovma@gmail.com, David C. Folch david.folch@asu.edu" import numpy as np -import copy as COPY import numpy.linalg as la from . import user_output as USER -from . import summary_output as SUMMARY +from .output import output, _spat_diag_out, _nonspat_mid, _nonspat_top from . import robust as ROBUST -from .utils import spdot, sphstack, RegressionPropsY, RegressionPropsVM, set_warn +from .utils import spdot, RegressionPropsY, RegressionPropsVM, set_warn, get_lags +import pandas as pd __all__ = ["OLS"] @@ -144,6 +144,9 @@ class OLS(BaseOLS): gwk : pysal W object Kernel spatial weights needed for HAC estimation. Note: matrix must have ones along the main diagonal. + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the SLX type. sig2n_k : boolean If True, then use n-k to estimate sigma^2. If False, use n. nonspat_diag : boolean @@ -171,10 +174,13 @@ class OLS(BaseOLS): Name of kernel weights matrix for use in output name_ds : string Name of dataset for use in output - + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -349,8 +355,9 @@ class OLS(BaseOLS): ready to be printed: >>> print(ols.summary) - REGRESSION - ---------- + REGRESSION RESULTS + ------------------ + SUMMARY OF OUTPUT: ORDINARY LEAST SQUARES ----------------------------------------- Data set : columbus @@ -360,7 +367,7 @@ class OLS(BaseOLS): S.D. dependent var : 18.4661 Degrees of Freedom : 46 R-squared : 0.3495 Adjusted R-squared : 0.3212 - Sum squared residual: 10647.015 F-statistic : 12.3582 + Sum squared residual: 10647 F-statistic : 12.3582 Sigma-square : 231.457 Prob(F-statistic) : 5.064e-05 S.E. of regression : 15.214 Log likelihood : -201.368 Sigma-square ML : 217.286 Akaike info criterion : 408.735 @@ -426,6 +433,7 @@ def __init__( w=None, robust=None, gwk=None, + slx_lags = 0, sig2n_k=True, nonspat_diag=True, spat_diag=False, @@ -437,34 +445,57 @@ def __init__( name_w=None, name_gwk=None, name_ds=None, + latex=False, ): n = USER.check_arrays(y, x) y = USER.check_y(y, n) - USER.check_weights(w, y) USER.check_robust(robust, gwk) + if robust == "hac" and spat_diag: + set_warn( + self, + "Spatial diagnostics are not available for HAC estimation. Hence, spatial diagnostics have been disabled for this model.", + ) + spat_diag = False + if robust in ["hac", "white"] and white_test: + set_warn( + self, + "White test not available when standard errors are estimated by HAC or White correction.", + ) + white_test = False USER.check_spat_diag(spat_diag, w) x_constant, name_x, warn = USER.check_constant(x, name_x) + self.name_x = USER.set_name_x(name_x, x_constant) + if slx_lags >0: + USER.check_weights(w, y, w_required=True) + lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + x_constant = np.hstack((x_constant, lag_x)) + self.name_x += USER.set_name_spatial_lags(self.name_x[1:], slx_lags) + else: + USER.check_weights(w, y, w_required=False) set_warn(self, warn) BaseOLS.__init__( self, y=y, x=x_constant, robust=robust, gwk=gwk, sig2n_k=sig2n_k ) - self.title = "ORDINARY LEAST SQUARES" self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) - self.name_x = USER.set_name_x(name_x, x_constant) + self.title = "ORDINARY LEAST SQUARES" + if slx_lags > 0: + self.title += " WITH SPATIALLY LAGGED X (SLX)" self.robust = USER.set_robust(robust) self.name_w = USER.set_name_w(name_w, w) self.name_gwk = USER.set_name_w(name_gwk, gwk) - SUMMARY.OLS( - reg=self, - vm=vm, - w=w, - nonspat_diag=nonspat_diag, - spat_diag=spat_diag, - moran=moran, - white_test=white_test, - ) + self.output = pd.DataFrame(self.name_x, columns=['var_names']) + self.output['var_type'] = ['x'] * len(self.name_x) + self.output['regime'], self.output['equation'] = (0, 0) + self.other_top, self.other_mid, other_end = ("", "", "") # strings where function-specific diag. are stored + if nonspat_diag: + self.other_mid += _nonspat_mid(self, white_test=white_test) + self.other_top += _nonspat_top(self) + if spat_diag: + other_end += _spat_diag_out(self, w, 'ols', moran=moran) + output(reg=self, vm=vm, robust=robust, other_end=other_end, latex=latex) + def _test(): @@ -505,4 +536,5 @@ def _test(): sig2n_k=True, moran=True, ) + print(ols.output) print(ols.summary) diff --git a/spreg/ols_regimes.py b/spreg/ols_regimes.py index bf06f1f2..20b2acea 100755 --- a/spreg/ols_regimes.py +++ b/spreg/ols_regimes.py @@ -2,23 +2,20 @@ Ordinary Least Squares regression with regimes. """ -__author__ = "Luc Anselin luc.anselin@asu.edu, Pedro V. Amaral pedro.amaral@asu.edu, Daniel Arribas-Bel darribas@asu.edu" +__author__ = "Luc Anselin, Pedro V. Amaral, Daniel Arribas-Bel" +import numpy as np +import multiprocessing as mp +import pandas as pd from . import regimes as REGI from . import user_output as USER +from .utils import set_warn, RegressionProps_basic, spdot, RegressionPropsY, get_lags from .ols import BaseOLS -from .utils import set_warn, spbroadcast, RegressionProps_basic, RegressionPropsY, spdot from .robust import hac_multi -from . import summary_output as SUMMARY -import numpy as np -import multiprocessing as mp -from platform import system -import scipy.sparse as SP -import copy as COPY +from .output import output, _spat_diag_out, _nonspat_mid, _nonspat_top class OLS_Regimes(BaseOLS, REGI.Regimes_Frame, RegressionPropsY): - """ Ordinary least squares with results and diagnostics. @@ -43,6 +40,10 @@ class OLS_Regimes(BaseOLS, REGI.Regimes_Frame, RegressionPropsY): gwk : pysal W object Kernel spatial weights needed for HAC estimation. Note: matrix must have ones along the main diagonal. + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the SLX type. + Note: WX is computed using the complete weights matrix sig2n_k : boolean If True, then use n-k to estimate sigma^2. If False, use n. nonspat_diag : boolean @@ -92,10 +93,13 @@ class OLS_Regimes(BaseOLS, REGI.Regimes_Frame, RegressionPropsY): Name of dataset for use in output name_regimes : string Name of regime variable for use in the output - + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -307,7 +311,7 @@ class OLS_Regimes(BaseOLS, REGI.Regimes_Frame, RegressionPropsY): >>> y_var = 'HR90' >>> y = db.by_col(y_var) - >>> y = np.array(y).reshape(len(y), 1) + >>> y = np.array(y) Extract UE90 (unemployment rate) and PS90 (population structure) vectors from the DBF to be used as independent variables in the regression. Other variables @@ -327,58 +331,122 @@ class OLS_Regimes(BaseOLS, REGI.Regimes_Frame, RegressionPropsY): We can now run the regression and then have a summary of the output by typing: olsr.summary - Alternatively, we can just check the betas and standard errors of the - parameters: >>> olsr = OLS_Regimes(y, x, regimes, nonspat_diag=False, name_y=y_var, name_x=['PS90','UE90'], name_regimes=r_var, name_ds='NAT') - >>> olsr.betas - array([[0.39642899], - [0.65583299], - [0.48703937], - [5.59835 ], - [1.16210453], - [0.53163886]]) - >>> np.sqrt(olsr.vm.diagonal()) - array([0.24816345, 0.09662678, 0.03628629, 0.46894564, 0.21667395, - 0.05945651]) - >>> olsr.cols2regi - 'all' + >>> print(olsr.summary) + REGRESSION RESULTS + ------------------ + + SUMMARY OF OUTPUT: ORDINARY LEAST SQUARES ESTIMATION - REGIME 0 + --------------------------------------------------------------- + Data set : NAT + Weights matrix : None + Dependent Variable : 0_HR90 Number of Observations: 1673 + Mean dependent var : 3.3416 Number of Variables : 3 + S.D. dependent var : 4.6795 Degrees of Freedom : 1670 + R-squared : 0.1271 + Adjusted R-squared : 0.1260 + + ------------------------------------------------------------------------------------ + Variable Coefficient Std.Error t-Statistic Probability + ------------------------------------------------------------------------------------ + 0_CONSTANT 0.3964290 0.2481634 1.5974512 0.1103544 + 0_PS90 0.6558330 0.0966268 6.7872800 0.0000000 + 0_UE90 0.4870394 0.0362863 13.4221336 0.0000000 + ------------------------------------------------------------------------------------ + Regimes variable: SOUTH + + SUMMARY OF OUTPUT: ORDINARY LEAST SQUARES ESTIMATION - REGIME 1 + --------------------------------------------------------------- + Data set : NAT + Weights matrix : None + Dependent Variable : 1_HR90 Number of Observations: 1412 + Mean dependent var : 9.5493 Number of Variables : 3 + S.D. dependent var : 7.0389 Degrees of Freedom : 1409 + R-squared : 0.0661 + Adjusted R-squared : 0.0647 + + ------------------------------------------------------------------------------------ + Variable Coefficient Std.Error t-Statistic Probability + ------------------------------------------------------------------------------------ + 1_CONSTANT 5.5983500 0.4689456 11.9381640 0.0000000 + 1_PS90 1.1621045 0.2166740 5.3633790 0.0000001 + 1_UE90 0.5316389 0.0594565 8.9416422 0.0000000 + ------------------------------------------------------------------------------------ + Regimes variable: SOUTH + ------------------------------------------------------------------------------------ + GLOBAL DIAGNOSTICS + + REGIMES DIAGNOSTICS - CHOW TEST + VARIABLE DF VALUE PROB + CONSTANT 1 96.129 0.0000 + PS90 1 4.554 0.0328 + UE90 1 0.410 0.5220 + Global test 3 680.960 0.0000 + ================================ END OF REPORT ===================================== """ def __init__( - self, - y, - x, - regimes, - w=None, - robust=None, - gwk=None, - sig2n_k=True, - nonspat_diag=True, - spat_diag=False, - moran=False, - white_test=False, - vm=False, - constant_regi="many", - cols2regi="all", - regime_err_sep=True, - cores=False, - name_y=None, - name_x=None, - name_regimes=None, - name_w=None, - name_gwk=None, - name_ds=None, + self, + y, + x, + regimes, + w=None, + robust=None, + gwk=None, + slx_lags=0, + sig2n_k=True, + nonspat_diag=True, + spat_diag=False, + moran=False, + white_test=False, + vm=False, + constant_regi="many", + cols2regi="all", + regime_err_sep=True, + cores=False, + name_y=None, + name_x=None, + name_regimes=None, + name_w=None, + name_gwk=None, + name_ds=None, + latex=False ): n = USER.check_arrays(y, x) y = USER.check_y(y, n) - USER.check_weights(w, y) USER.check_robust(robust, gwk) + if robust == "hac": + if regime_err_sep: + set_warn( + self, + "Error by regimes is not available for HAC estimation. The error by regimes has been disabled for this model.", + ) + regime_err_sep = False + if spat_diag: + set_warn( + self, + "Spatial diagnostics are not available for HAC estimation. The spatial diagnostics have been disabled for this model.", + ) + spat_diag = False + if robust in ["hac", "white"] and white_test: + set_warn( + self, + "White test not available when standard errors are estimated by HAC or White correction.", + ) + white_test = False USER.check_spat_diag(spat_diag, w) x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) - set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) + if slx_lags > 0: + USER.check_weights(w, y, w_required=True) + lag_x = get_lags(w, x_constant, slx_lags) + x_constant = np.hstack((x_constant, lag_x)) + name_x += USER.set_name_spatial_lags(name_x, slx_lags) + else: + USER.check_weights(w, y, w_required=False) + set_warn(self, warn) self.name_x_r = USER.set_name_x(name_x, x_constant) self.constant_regi = constant_regi self.cols2regi = cols2regi @@ -394,17 +462,11 @@ def __init__( self.regimes_set = REGI._get_regimes_set(regimes) self.regimes = regimes USER.check_regimes(self.regimes_set, self.n, x_constant.shape[1]) - if regime_err_sep == True and robust == "hac": - set_warn( - self, - "Error by regimes is incompatible with HAC estimation. Hence, error by regimes has been disabled for this model.", - ) - regime_err_sep = False self.regime_err_sep = regime_err_sep if ( - regime_err_sep == True - and set(cols2regi) == set([True]) - and constant_regi == "many" + regime_err_sep == True + and set(cols2regi) == set([True]) + and constant_regi == "many" ): self.y = y regi_ids = dict( @@ -416,6 +478,7 @@ def __init__( regi_ids, cores, gwk, + slx_lags, sig2n_k, robust, nonspat_diag, @@ -424,11 +487,19 @@ def __init__( name_x, moran, white_test, + latex ) else: - x, self.name_x = REGI.Regimes_Frame.__init__( - self, x_constant, regimes, constant_regi, cols2regi, name_x + x, self.name_x, x_rlist = REGI.Regimes_Frame.__init__( + self, x_constant, regimes, constant_regi, cols2regi, name_x, rlist=True ) + + self.output = pd.DataFrame(self.name_x, + columns=['var_names']) + self.output['var_type'] = ['x'] * len(self.name_x) + self.output['regime'] = x_rlist + self.output['equation'] = 0 + BaseOLS.__init__(self, y=y, x=x, robust=robust, gwk=gwk, sig2n_k=sig2n_k) if regime_err_sep == True and robust == None: y2, x2 = REGI._get_weighted_var( @@ -439,41 +510,45 @@ def __init__( self.title = ( "ORDINARY LEAST SQUARES - REGIMES (Group-wise heteroskedasticity)" ) + if slx_lags > 0: + self.title = "ORDINARY LEAST SQUARES WITH SLX - REGIMES (Group-wise heteroskedasticity)" nonspat_diag = None set_warn( self, "Residuals treated as homoskedastic for the purpose of diagnostics.", ) else: - self.title = "ORDINARY LEAST SQUARES - REGIMES" + if slx_lags == 0: + self.title = "ORDINARY LEAST SQUARES - REGIMES" + else: + self.title = "ORDINARY LEAST SQUARES WITH SLX - REGIMES" self.robust = USER.set_robust(robust) self.chow = REGI.Chow(self) - SUMMARY.OLS( - reg=self, - vm=vm, - w=w, - nonspat_diag=nonspat_diag, - spat_diag=spat_diag, - moran=moran, - white_test=white_test, - regimes=True, - ) + self.other_top, self.other_mid, other_end = ("", "", "") # strings where function-specific diag. are stored + if nonspat_diag: + self.other_mid += _nonspat_mid(self, white_test=white_test) + self.other_top += _nonspat_top(self) + if spat_diag: + other_end += _spat_diag_out(self, w, 'ols', moran=moran) + output(reg=self, vm=vm, robust=robust, other_end=other_end, latex=latex) def _ols_regimes_multi( - self, - x, - w, - regi_ids, - cores, - gwk, - sig2n_k, - robust, - nonspat_diag, - spat_diag, - vm, - name_x, - moran, - white_test, + self, + x, + w, + regi_ids, + cores, + gwk, + slx_lags, + sig2n_k, + robust, + nonspat_diag, + spat_diag, + vm, + name_x, + moran, + white_test, + latex ): results_p = {} """ @@ -506,6 +581,7 @@ def _ols_regimes_multi( name_x, self.name_w, self.name_regimes, + slx_lags ), ) else: @@ -523,6 +599,7 @@ def _ols_regimes_multi( name_x, self.name_w, self.name_regimes, + slx_lags ) ) self.kryd = 0 @@ -545,6 +622,7 @@ def _ols_regimes_multi( results = {} self.name_y, self.name_x = [], [] counter = 0 + self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) for r in self.regimes_set: """ if is_win: @@ -558,11 +636,11 @@ def _ols_regimes_multi( results[r] = results_p[r].get() self.vm[ - (counter * self.kr) : ((counter + 1) * self.kr), - (counter * self.kr) : ((counter + 1) * self.kr), + (counter * self.kr): ((counter + 1) * self.kr), + (counter * self.kr): ((counter + 1) * self.kr), ] = results[r].vm self.betas[ - (counter * self.kr) : ((counter + 1) * self.kr), + (counter * self.kr): ((counter + 1) * self.kr), ] = results[r].betas self.u[ regi_ids[r], @@ -572,25 +650,24 @@ def _ols_regimes_multi( ] = results[r].predy self.name_y += results[r].name_y self.name_x += results[r].name_x + self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_x, + 'var_type': ['x']*len(results[r].name_x), + 'regime': r, 'equation': r})], ignore_index=True) + results[r].other_top, results[r].other_mid = ("", "") + if nonspat_diag: + results[r].other_mid += _nonspat_mid(results[r], white_test=white_test) + results[r].other_top += _nonspat_top(results[r]) counter += 1 self.multi = results self.hac_var = x_constant[:, 1:] if robust == "hac": hac_multi(self, gwk) self.chow = REGI.Chow(self) + other_end = "" if spat_diag: self._get_spat_diag_props(x_constant, sig2n_k) - SUMMARY.OLS_multi( - reg=self, - multireg=self.multi, - vm=vm, - nonspat_diag=nonspat_diag, - spat_diag=spat_diag, - moran=moran, - white_test=white_test, - regimes=True, - w=w, - ) + other_end += _spat_diag_out(self, w, 'ols', moran=moran) + output(reg=self, vm=vm, robust=robust, other_end=other_end, latex=latex) def _get_spat_diag_props(self, x, sig2n_k): self.k = self.kr @@ -603,7 +680,7 @@ def _get_spat_diag_props(self, x, sig2n_k): def _work( - y, x, w, regi_ids, r, robust, sig2n_k, name_ds, name_y, name_x, name_w, name_regimes + y, x, w, regi_ids, r, robust, sig2n_k, name_ds, name_y, name_x, name_w, name_regimes, slx_lags ): y_r = y[regi_ids[r]] x_r = x[regi_ids[r]] @@ -612,7 +689,10 @@ def _work( if robust == "hac": robust = None model = BaseOLS(y_r, x_r, robust=robust, sig2n_k=sig2n_k) - model.title = "ORDINARY LEAST SQUARES ESTIMATION - REGIME %s" % r + if slx_lags == 0: + model.title = "ORDINARY LEAST SQUARES ESTIMATION - REGIME %s" % r + else: + model.title = "ORDINARY LEAST SQUARES ESTIMATION WITH SLX - REGIME %s" % r model.robust = USER.set_robust(robust) model.name_ds = name_ds model.name_y = "%s_%s" % (str(r), name_y) @@ -639,32 +719,35 @@ def _test(): _test() import numpy as np import libpysal + import pysal db = libpysal.io.open(libpysal.examples.get_path("NAT.dbf"), "r") - y_var = "CRIME" - y = np.array([db.by_col(y_var)]).reshape(49, 1) - x_var = ["INC", "HOVAL"] + y_var = "HR90" + y = np.array(db.by_col(y_var)).reshape(-1,1) + x_var = ['PS90','UE90'] x = np.array([db.by_col(name) for name in x_var]).T - r_var = "NSA" + r_var = "SOUTH" regimes = db.by_col(r_var) - w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("NAT.shp")) w.transform = "r" + #olsr = pysal.model.spreg.OLS_Regimes( olsr = OLS_Regimes( - y, + y, x, regimes, w=w, constant_regi="many", - nonspat_diag=False, - spat_diag=False, + nonspat_diag=True, + spat_diag=True, name_y=y_var, - name_x=["INC", "HOVAL"], - name_ds="columbus", + name_x=x_var, + name_ds="NAT", name_regimes=r_var, - name_w="columbus.gal", regime_err_sep=True, cols2regi=[True, True], - sig2n_k=True, - robust="white", + sig2n_k=False, + white_test=True, + #robust="white" ) + print(olsr.output) print(olsr.summary) diff --git a/spreg/output.py b/spreg/output.py new file mode 100644 index 00000000..c871c842 --- /dev/null +++ b/spreg/output.py @@ -0,0 +1,476 @@ +"""Internal helper files for user output.""" + +__author__ = "Luc Anselin, Pedro V. Amaral" + +import textwrap as TW +import numpy as np +import pandas as pd +from . import diagnostics as diagnostics +from . import diagnostics_tsls as diagnostics_tsls +from . import diagnostics_sp as diagnostics_sp + +__all__ = [] + +############################################################################### +############### Primary functions for running summary diagnostics ############# +############################################################################### + +def output(reg, vm, other_end=False, robust=False, latex=False): + strSummary = output_start(reg) + for eq in reg.output['equation'].unique(): + try: + reg.multi[eq].__summary = {} + strSummary, reg.multi[eq] = out_part_top(strSummary, reg, eq) + except: + eq = None + strSummary, reg = out_part_top(strSummary, reg, eq) + strSummary, reg = out_part_middle(strSummary, reg, robust, m=eq) + strSummary, reg = out_part_end(strSummary, reg, vm, other_end, m=eq) + reg.summary = strSummary + reg.output.sort_values(by=['equation', 'regime'], inplace=True) + reg.output.drop(['var_type', 'regime', 'equation'], axis=1, inplace=True) + +def output_start(reg): + reg.__summary = {} + strSummary = "REGRESSION RESULTS\n" + strSummary += "------------------\n" + reg.output = reg.output.assign(coefficients=[None] * len(reg.output), std_err=[None] * len(reg.output), + zt_stat=[None] * len(reg.output), prob=[None] * len(reg.output)) + return strSummary + +def out_part_top(strSummary, reg, m): + # Top part of summary output. + # m = None for single models, m = 1,2,3... for multiple equation models + if m == None: + _reg = reg # _reg = local object with regression results + else: + _reg = reg.multi[m] # _reg = local object with equation specific regression results + title = "\nSUMMARY OF OUTPUT: " + _reg.title + "\n" + strSummary += title + strSummary += "-" * (len(title) - 2) + "\n" + strSummary += "%-20s:%12s\n" % ("Data set", _reg.name_ds) + try: + strSummary += "%-20s:%12s\n" % ("Weights matrix", _reg.name_w) + except: + pass + + strSummary += "%-20s:%12s %-22s:%12d\n" % ( + "Dependent Variable", + _reg.name_y, + "Number of Observations", + _reg.n, + ) + + strSummary += "%-20s:%12.4f %-22s:%12d\n" % ( + "Mean dependent var", + _reg.mean_y, + "Number of Variables", + _reg.k, + ) + strSummary += "%-20s:%12.4f %-22s:%12d\n" % ( + "S.D. dependent var", + _reg.std_y, + "Degrees of Freedom", + _reg.n - _reg.k, + ) + + _reg.std_err = diagnostics.se_betas(_reg) + if 'OLS' in reg.__class__.__name__: + _reg.t_stat = diagnostics.t_stat(_reg) + _reg.r2 = diagnostics.r2(_reg) + _reg.ar2 = diagnostics.ar2(_reg) + strSummary += "%-20s:%12.4f\n%-20s:%12.4f\n" % ( + "R-squared", + _reg.r2, + "Adjusted R-squared", + _reg.ar2, + ) + _reg.__summary["summary_zt"] = "t" + + else: + _reg.z_stat = diagnostics.t_stat(_reg, z_stat=True) + _reg.pr2 = diagnostics_tsls.pr2_aspatial(_reg) + strSummary += "%-20s:%12.4f\n" % ("Pseudo R-squared", _reg.pr2) + _reg.__summary["summary_zt"] = "z" + + try: # Adding additional top part if there is one + strSummary += _reg.other_top + except: + pass + + return (strSummary, _reg) + +def out_part_middle(strSummary, reg, robust, m=None): + # Middle part of summary output. + # m = None for single models, m = 1,2,3... for multiple equation models + if m==None: + _reg = reg #_reg = local object with regression results + m = reg.output['equation'].unique()[0] + else: + _reg = reg.multi[m] #_reg = local object with equation specific regression results + coefs = pd.DataFrame(_reg.betas, columns=['coefficients']) + coefs['std_err'] = pd.DataFrame(_reg.std_err) + try: + coefs = pd.concat([coefs, pd.DataFrame(_reg.z_stat, columns=['zt_stat', 'prob'])], axis=1) + except AttributeError: + coefs = pd.concat([coefs, pd.DataFrame(_reg.t_stat, columns=['zt_stat', 'prob'])], axis=1) + coefs.index = reg.output[reg.output['equation'] == m].index + reg.output.update(coefs) + strSummary += "\n" + if robust: + if robust == "white": + strSummary += "White Standard Errors\n" + elif robust == "hac": + strSummary += "HAC Standard Errors; Kernel Weights: " + _reg.name_gwk + "\n" + elif robust == "ogmm": + strSummary += "Optimal GMM used to estimate the coefficients and the variance-covariance matrix\n" + strSummary += "------------------------------------------------------------------------------------\n" + strSummary += ( + " Variable Coefficient Std.Error %1s-Statistic Probability\n" + % (_reg.__summary["summary_zt"]) + ) + strSummary += "------------------------------------------------------------------------------------\n" + m_output = reg.output[reg.output['equation'] == m] + for row in m_output.iloc[np.lexsort((m_output.index, m_output['regime']))].itertuples(): + try: + strSummary += "%20s %12.5f %12.5f %12.5f %12.5f\n" % ( + row.var_names, + row.coefficients, + row.std_err, + row.zt_stat, + row.prob + ) + except TypeError: # special case for models that do not have inference on the lambda term + strSummary += "%20s %12.5f \n" % ( + row.var_names, + row.coefficients + ) + strSummary += "------------------------------------------------------------------------------------\n" + + try: # Adding info on instruments if they are present + name_q = _reg.name_q + name_yend = _reg.name_yend + insts = "Instruments: " + for name in sorted(name_q): + insts += name + ", " + text_wrapper = TW.TextWrapper(width=76, subsequent_indent=" ") + insts = text_wrapper.fill(insts[:-2]) + insts += "\n" + inst2 = "Instrumented: " + for name in sorted(name_yend): + inst2 += name + ", " + text_wrapper = TW.TextWrapper(width=76, subsequent_indent=" ") + inst2 = text_wrapper.fill(inst2[:-2]) + inst2 += "\n" + inst2 += insts + strSummary += inst2 + except: + pass + + try: # Adding info on regimes if they are present + strSummary += ("Regimes variable: %s\n" % _reg.name_regimes) + strSummary += _summary_chow(_reg) # If local regimes present, compute Chow test. + except: + pass + + try: # Adding local warning if there is one + if _reg.warning: + strSummary += _reg.warning + except: + pass + + try: # Adding other middle part if there are any + strSummary += _reg.other_mid + except: + pass + + return (strSummary, reg) + +def out_part_end(strSummary, reg, vm, other_end, m=None): + if m is not None: + strSummary += "------------------------------------------------------------------------------------\n" + try: # Adding global warning if there is one + strSummary += reg.warning + except: + pass + strSummary += "\nGLOBAL DIAGNOSTICS\n" + try: # Adding global Chow test if there is one + strSummary += _summary_chow(reg) + except: + pass + if other_end: + strSummary += other_end + if vm: + strSummary += _summary_vm(reg) + strSummary += "================================ END OF REPORT =====================================" + return (strSummary, reg) + +def _summary_chow(reg): + sum_text = "\nREGIMES DIAGNOSTICS - CHOW TEST" + name_x_r = reg.name_x_r + joint, regi = reg.chow.joint, reg.chow.regi + sum_text += "\n VARIABLE DF VALUE PROB\n" + if reg.cols2regi == "all": + names_chow = name_x_r[1:] + else: + names_chow = [name_x_r[1:][i] for i in np.where(reg.cols2regi)[0]] + if reg.constant_regi == "many": + names_chow = ["CONSTANT"] + names_chow + + if 'lambda' in reg.output.var_type.values: + if reg.output.var_type.value_counts()['lambda'] > 1: + names_chow += ["lambda"] + + reg.output_chow = pd.DataFrame() + reg.output_chow['var_names'] = names_chow + reg.output_chow['df'] = reg.nr - 1 + reg.output_chow = pd.concat([reg.output_chow, pd.DataFrame(regi, columns=['value', 'prob'])], axis=1) + reg.output_chow = pd.concat([reg.output_chow, pd.DataFrame([{'var_names': 'Global test', + 'df': reg.kr * (reg.nr - 1), + 'value': joint[0], 'prob': joint[1]}])], ignore_index=True) + for row in reg.output_chow.itertuples(): + sum_text += "%25s %2d %12.3f %9.4f\n" % ( + row.var_names, + row.df, + row.value, + row.prob + ) + + return sum_text + + +def _spat_diag_out(reg, w, type, moran=False): + strSummary = "\nDIAGNOSTICS FOR SPATIAL DEPENDENCE\n" + if not moran: + strSummary += ( + "TEST DF VALUE PROB\n" + ) + else: + strSummary += ( + "TEST MI/DF VALUE PROB\n" + ) + + cache = diagnostics_sp.spDcache(reg, w) + if type == "yend": + mi, ak, ak_p = diagnostics_sp.akTest(reg, w, cache) + reg.ak_test = ak, ak_p + strSummary += "%-27s %2d %12.3f %9.4f\n" % ( + "Anselin-Kelejian Test", + 1, + reg.ak_test[0], + reg.ak_test[1], + ) + elif type == "ols": + lm_tests = diagnostics_sp.LMtests(reg, w) + reg.lm_error = lm_tests.lme + reg.lm_lag = lm_tests.lml + reg.rlm_error = lm_tests.rlme + reg.rlm_lag = lm_tests.rlml + reg.lm_sarma = lm_tests.sarma + if moran: + moran_res = diagnostics_sp.MoranRes(reg, w, z=True) + reg.moran_res = moran_res.I, moran_res.zI, moran_res.p_norm + strSummary += "%-27s %8.4f %9.3f %9.4f\n" % ( + "Moran's I (error)", + reg.moran_res[0], + reg.moran_res[1], + reg.moran_res[2], + ) + strSummary += "%-27s %2d %12.3f %9.4f\n" % ( + "Lagrange Multiplier (lag)", + 1, + reg.lm_lag[0], + reg.lm_lag[1], + ) + strSummary += "%-27s %2d %12.3f %9.4f\n" % ( + "Robust LM (lag)", + 1, + reg.rlm_lag[0], + reg.rlm_lag[1], + ) + strSummary += "%-27s %2d %12.3f %9.4f\n" % ( + "Lagrange Multiplier (error)", + 1, + reg.lm_error[0], + reg.lm_error[1], + ) + strSummary += "%-27s %2d %12.3f %9.4f\n" % ( + "Robust LM (error)", + 1, + reg.rlm_error[0], + reg.rlm_error[1], + ) + strSummary += "%-27s %2d %12.3f %9.4f\n\n" % ( + "Lagrange Multiplier (SARMA)", + 2, + reg.lm_sarma[0], + reg.lm_sarma[1], + ) + return strSummary + +def _nonspat_top(reg, ml=False): + if not ml: + reg.sig2ML = reg.sig2n + reg.f_stat = diagnostics.f_stat(reg) + reg.logll = diagnostics.log_likelihood(reg) + reg.aic = diagnostics.akaike(reg) + reg.schwarz = diagnostics.schwarz(reg) + + strSummary = "%-20s:%12.6g %-22s:%12.4f\n" % ( + "Sum squared residual", reg.utu, "F-statistic", reg.f_stat[0],) + strSummary += "%-20s:%12.3f %-22s:%12.4g\n" % ( + "Sigma-square", + reg.sig2, + "Prob(F-statistic)", + reg.f_stat[1], + ) + strSummary += "%-20s:%12.3f %-22s:%12.3f\n" % ( + "S.E. of regression", + np.sqrt(reg.sig2), + "Log likelihood", + reg.logll, + ) + strSummary += "%-20s:%12.3f %-22s:%12.3f\n" % ( + "Sigma-square ML", + reg.sig2ML, + "Akaike info criterion", + reg.aic, + ) + strSummary += "%-20s:%12.4f %-22s:%12.3f\n" % ( + "S.E of regression ML", + np.sqrt(reg.sig2ML), + "Schwarz criterion", + reg.schwarz, + ) + else: + strSummary = "%-20s:%12.4f\n" % ( + "Log likelihood", + reg.logll, + ) + strSummary += "%-20s:%12.4f %-22s:%12.3f\n" % ( + "Sigma-square ML", + reg.sig2, + "Akaike info criterion", + reg.aic, + ) + strSummary += "%-20s:%12.4f %-22s:%12.3f\n" % ( + "S.E of regression", + np.sqrt(reg.sig2), + "Schwarz criterion", + reg.schwarz, + ) + + return strSummary + +def _nonspat_mid(reg, white_test=False): + # compute diagnostics + reg.mulColli = diagnostics.condition_index(reg) + reg.jarque_bera = diagnostics.jarque_bera(reg) + reg.breusch_pagan = diagnostics.breusch_pagan(reg) + reg.koenker_bassett = diagnostics.koenker_bassett(reg) + + if white_test: + reg.white = diagnostics.white(reg) + + strSummary = "\nREGRESSION DIAGNOSTICS\n" + if reg.mulColli: + strSummary += "MULTICOLLINEARITY CONDITION NUMBER %16.3f\n\n" % (reg.mulColli) + strSummary += "TEST ON NORMALITY OF ERRORS\n" + strSummary += "TEST DF VALUE PROB\n" + strSummary += "%-27s %2d %14.3f %9.4f\n\n" % ( + "Jarque-Bera", + reg.jarque_bera["df"], + reg.jarque_bera["jb"], + reg.jarque_bera["pvalue"], + ) + strSummary += "DIAGNOSTICS FOR HETEROSKEDASTICITY\n" + strSummary += "RANDOM COEFFICIENTS\n" + strSummary += "TEST DF VALUE PROB\n" + strSummary += "%-27s %2d %12.3f %9.4f\n" % ( + "Breusch-Pagan test", + reg.breusch_pagan["df"], + reg.breusch_pagan["bp"], + reg.breusch_pagan["pvalue"], + ) + strSummary += "%-27s %2d %12.3f %9.4f\n" % ( + "Koenker-Bassett test", + reg.koenker_bassett["df"], + reg.koenker_bassett["kb"], + reg.koenker_bassett["pvalue"], + ) + try: + if reg.white: + strSummary += "\nSPECIFICATION ROBUST TEST\n" + if len(reg.white) > 3: + strSummary += reg.white + "\n" + else: + strSummary += ( + "TEST DF VALUE PROB\n" + ) + strSummary += "%-27s %2d %12.3f %9.4f\n" % ( + "White", + reg.white["df"], + reg.white["wh"], + reg.white["pvalue"], + ) + except: + pass + return strSummary + +def _spat_pseudo_r2(reg): + if np.abs(reg.rho) < 1: + reg.pr2_e = diagnostics_tsls.pr2_spatial(reg) + strSummary = "%-20s: %5.4f\n" % ("Spatial Pseudo R-squared", reg.pr2_e) + else: + strSummary = "Spatial Pseudo R-squared: omitted due to rho outside the boundary (-1, 1).\n" + return strSummary + +def _summary_vm(reg): + strVM = "\n" + strVM += "COEFFICIENTS VARIANCE MATRIX\n" + strVM += "----------------------------\n" + try: + for name in reg.name_z: + strVM += "%12s" % (name) + except: + for name in reg.name_x: + strVM += "%12s" % (name) + strVM += "\n" + nrow = reg.vm.shape[0] + ncol = reg.vm.shape[1] + for i in range(nrow): + for j in range(ncol): + strVM += "%12.6f" % (reg.vm[i][j]) + strVM += "\n" + return strVM + +def _summary_iteration(reg): + """Reports the number of iterations computed and the type of estimator used for hom and het models.""" + try: + niter = reg.niter + except: + niter = reg.iteration + + txt = "%-20s:%12s\n" % ("N. of iterations", niter) + + try: + if reg.step1c: + step1c = "Yes" + else: + step1c = "No" + txt = txt[:-1] + " %-22s:%12s\n" % ( + "Step1c computed", + step1c, + ) + except: + pass + + try: + txt = txt[:-1] + " %-22s:%12s" % ( + "A1 type: ", + reg.A1, + ) + except: + pass + + return txt diff --git a/spreg/regimes.py b/spreg/regimes.py index fad91d71..d0c4db06 100755 --- a/spreg/regimes.py +++ b/spreg/regimes.py @@ -3,8 +3,6 @@ import scipy.sparse as SP import itertools as iter from scipy.stats import f, chi2 - -chisqprob = chi2.sf import itertools as iter import numpy.linalg as la from .utils import spbroadcast, set_warn @@ -186,6 +184,10 @@ class Regimes_Frame: If 'all' (default), all the variables vary by regime. names : None, list of strings Names of independent variables for use in output + yend : boolean + If True, the last columns of cols2regi are considered for endogenous variables + rlist : boolean + If True, returns a list with the regimes indicators for each variable Returns ------- @@ -212,7 +214,7 @@ class Regimes_Frame: """ - def __init__(self, x, regimes, constant_regi, cols2regi, names=None, yend=False): + def __init__(self, x, regimes, constant_regi, cols2regi, names=None, yend=False, rlist=False): if cols2regi == "all": cols2regi = [True] * x.shape[1] else: @@ -228,8 +230,8 @@ def __init__(self, x, regimes, constant_regi, cols2regi, names=None, yend=False) cols2regi.insert(0, True) else: raise Exception( - "Invalid argument (%s) passed for 'constant_regi'. Please secify a valid term." - % str(constant) + "Invalid argument (%s) passed for 'constant_regi'. Please specify a valid term." + % str(constant_regi) ) try: x = regimeX_setup( @@ -252,12 +254,18 @@ def __init__(self, x, regimes, constant_regi, cols2regi, names=None, yend=False) self.kryd = 0 self.nr = len(set(regimes)) - if names: - names = set_name_x_regimes( - names, regimes, constant_regi, cols2regi, self.regimes_set - ) - - return (x, names) + if rlist: + if names: + names, r_list = set_name_x_regimes( + names, regimes, constant_regi, cols2regi, self.regimes_set, rlist + ) + return (x, names, r_list) + else: + if names: + names = set_name_x_regimes( + names, regimes, constant_regi, cols2regi, self.regimes_set + ) + return (x, names) def wald_test(betas, r, q, vm): @@ -290,7 +298,7 @@ def wald_test(betas, r, q, vm): rvri = la.inv(np.dot(r, np.dot(vm, r.T))) w = np.dot(rbq.T, np.dot(rvri, rbq))[0][0] df = r.shape[0] - pvalue = chisqprob(w, df) + pvalue = chi2.sf(w, df) return w, pvalue @@ -425,7 +433,7 @@ def regimeX_setup(x, regimes, cols2regi, regimes_set, constant=False): return xsp -def set_name_x_regimes(name_x, regimes, constant_regi, cols2regi, regimes_set): +def set_name_x_regimes(name_x, regimes, constant_regi, cols2regi, regimes_set, rlist=False): """ Generate the set of variable names in a regimes setup, according to the order of the betas @@ -475,11 +483,17 @@ def set_name_x_regimes(name_x, regimes, constant_regi, cols2regi, regimes_set): vars_regi = nxa[np.where(c2ra == True)] vars_glob = nxa[np.where(c2ra == False)] name_x_regi = [] + r_list = [] for r in regimes_set: rl = ["%s_%s" % (str(r), i) for i in vars_regi] name_x_regi.extend(rl) + r_list.extend([str(r)] * len(rl)) name_x_regi.extend(["_Global_%s" % i for i in vars_glob]) - return name_x_regi + r_list.extend(["_Global"] * len(vars_glob)) + if rlist: + return (name_x_regi, r_list) + else: + return name_x_regi def w_regime(w, regi_ids, regi_i, transform=True, min_n=None): diff --git a/spreg/twosls.py b/spreg/twosls.py index 6a6bd68f..0fe355b1 100644 --- a/spreg/twosls.py +++ b/spreg/twosls.py @@ -1,11 +1,12 @@ import numpy as np import numpy.linalg as la -from . import summary_output as SUMMARY from . import robust as ROBUST from . import user_output as USER -from .utils import spdot, sphstack, RegressionPropsY, RegressionPropsVM, set_warn +from .utils import spdot, sphstack, RegressionPropsY, RegressionPropsVM, set_warn, get_lags +import pandas as pd +from .output import output, _spat_diag_out -__author__ = "Luc Anselin luc.anselin@asu.edu, David C. Folch david.folch@asu.edu, Jing Yao jingyao@asu.edu" +__author__ = "Luc Anselin lanselin@gmail.com, Pedro Amaral pedrovma@gmail.com, David C. Folch david.folch@asu.edu, Jing Yao jingyao@asu.edu" __all__ = ["TSLS"] @@ -123,10 +124,8 @@ class BaseTSLS(RegressionPropsY, RegressionPropsVM): >>> q.append(db.by_col("DISCBD")) >>> q = np.array(q).T >>> reg = spreg.twosls.BaseTSLS(y, X, yd, q=q) - >>> print(reg.betas) - [[88.46579584] - [ 0.5200379 ] - [-1.58216593]] + >>> print(reg.betas.T) + [[88.46579584 0.5200379 -1.58216593]] >>> reg = spreg.twosls.BaseTSLS(y, X, yd, q=q, robust="white") """ @@ -248,6 +247,9 @@ class TSLS(BaseTSLS): gwk : pysal W object Kernel spatial weights needed for HAC estimation. Note: matrix must have ones along the main diagonal. + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the SLX type. sig2n_k : boolean If True, then use n-k to estimate sigma^2. If False, use n. spat_diag : boolean @@ -269,10 +271,13 @@ class TSLS(BaseTSLS): Name of kernel weights matrix for use in output name_ds : string Name of dataset for use in output - + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -423,11 +428,8 @@ class TSLS(BaseTSLS): >>> from spreg import TSLS >>> reg = TSLS(y, X, yd, q, name_x=['inc'], name_y='crime', name_yend=['hoval'], name_q=['discbd'], name_ds='columbus') - >>> print(reg.betas) - [[88.46579584] - [ 0.5200379 ] - [-1.58216593]] - + >>> print(reg.betas.T) + [[88.46579584 0.5200379 -1.58216593]] """ def __init__( @@ -439,6 +441,7 @@ def __init__( w=None, robust=None, gwk=None, + slx_lags=0, sig2n_k=False, spat_diag=False, vm=False, @@ -449,14 +452,28 @@ def __init__( name_w=None, name_gwk=None, name_ds=None, + latex=False, ): n = USER.check_arrays(y, x, yend, q) y = USER.check_y(y, n) - USER.check_weights(w, y) USER.check_robust(robust, gwk) + if robust == "hac" and spat_diag: + set_warn( + self, + "Spatial diagnostics are not available for HAC estimation. The spatial diagnostics have been disabled for this model.", + ) + spat_diag = False USER.check_spat_diag(spat_diag, w) x_constant, name_x, warn = USER.check_constant(x, name_x) + self.name_x = USER.set_name_x(name_x, x_constant) + if slx_lags>0: + USER.check_weights(w, y, w_required=True) + lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + x_constant = np.hstack((x_constant, lag_x)) + self.name_x += USER.set_name_spatial_lags(self.name_x[1:], slx_lags) + else: + USER.check_weights(w, y, w_required=False) set_warn(self, warn) BaseTSLS.__init__( self, @@ -469,9 +486,10 @@ def __init__( sig2n_k=sig2n_k, ) self.title = "TWO STAGE LEAST SQUARES" + if slx_lags > 0: + self.title += " WITH SPATIALLY LAGGED X (2SLS-SLX)" self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) - self.name_x = USER.set_name_x(name_x, x_constant) self.name_yend = USER.set_name_yend(name_yend, yend) self.name_z = self.name_x + self.name_yend self.name_q = USER.set_name_q(name_q, q) @@ -479,8 +497,15 @@ def __init__( self.robust = USER.set_robust(robust) self.name_w = USER.set_name_w(name_w, w) self.name_gwk = USER.set_name_w(name_gwk, gwk) - SUMMARY.TSLS(reg=self, vm=vm, w=w, spat_diag=spat_diag) - + self.output = pd.DataFrame(self.name_x + self.name_yend, + columns=['var_names']) + self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * len(self.name_yend) + self.output['regime'], self.output['equation'] = (0, 0) + if spat_diag: + diag_out = _spat_diag_out(self, w, 'yend') + else: + diag_out = None + output(reg=self, vm=vm, robust=robust, other_end=diag_out, latex=latex) def _test(): import doctest @@ -521,4 +546,5 @@ def _test(): name_ds="columbus", name_w="columbus.gal", ) + print(tsls.output) print(tsls.summary) diff --git a/spreg/twosls_regimes.py b/spreg/twosls_regimes.py index 8c4ddfcb..50a36853 100644 --- a/spreg/twosls_regimes.py +++ b/spreg/twosls_regimes.py @@ -1,19 +1,18 @@ import numpy as np +import multiprocessing as mp +import pandas as pd from . import regimes as REGI from . import user_output as USER -import multiprocessing as mp -import scipy.sparse as SP -from .utils import sphstack, set_warn, RegressionProps_basic, spdot, sphstack +from .utils import set_warn, RegressionProps_basic, spdot, sphstack, get_lags from .twosls import BaseTSLS from .robust import hac_multi -from . import summary_output as SUMMARY -from platform import system +from .output import output, _spat_diag_out """ Two-stage Least Squares estimation with regimes. """ -__author__ = "Luc Anselin luc.anselin@asu.edu, Pedro V. Amaral pedro.amaral@asu.edu, David C. Folch david.folch@asu.edu" +__author__ = "Luc Anselin, Pedro V. Amaral, David C. Folch" class TSLS_Regimes(BaseTSLS, REGI.Regimes_Frame): @@ -65,6 +64,10 @@ class TSLS_Regimes(BaseTSLS, REGI.Regimes_Frame): gwk : pysal W object Kernel spatial weights needed for HAC estimation. Note: matrix must have ones along the main diagonal. + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the SLX type. + Note: WX is computed using the complete weights matrix sig2n_k : boolean If True, then use n-k to estimate sigma^2. If False, use n. vm : boolean @@ -89,9 +92,16 @@ class TSLS_Regimes(BaseTSLS, REGI.Regimes_Frame): Name of kernel weights matrix for use in output name_ds : string Name of dataset for use in output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe + summary : string + Summary of regression results and diagnostics (note: use in + conjunction with the print command) betas : array kx1 array of estimated coefficients u : array @@ -269,6 +279,62 @@ class TSLS_Regimes(BaseTSLS, REGI.Regimes_Frame): array([0.38389901, 0.09963973, 0.04672091, 0.22725012, 0.49181223, 0.19630774, 0.07784587, 0.25529011]) + >>> print(tslsr.summary) + REGRESSION RESULTS + ------------------ + + SUMMARY OF OUTPUT: TWO STAGE LEAST SQUARES ESTIMATION - REGIME 0 + ---------------------------------------------------------------- + Data set : NAT + Weights matrix : NAT.shp + Dependent Variable : 0_HR90 Number of Observations: 1673 + Mean dependent var : 3.3416 Number of Variables : 4 + S.D. dependent var : 4.6795 Degrees of Freedom : 1669 + Pseudo R-squared : 0.2092 + + ------------------------------------------------------------------------------------ + Variable Coefficient Std.Error z-Statistic Probability + ------------------------------------------------------------------------------------ + 0_CONSTANT 3.6697356 0.3838990 9.5591172 0.0000000 + 0_PS90 1.0695047 0.0996397 10.7337170 0.0000000 + 0_UE90 0.1468095 0.0467209 3.1422643 0.0016765 + 0_RD90 2.4586420 0.2272501 10.8191009 0.0000000 + ------------------------------------------------------------------------------------ + Instrumented: 0_RD90 + Instruments: 0_FP89 + Regimes variable: SOUTH + + SUMMARY OF OUTPUT: TWO STAGE LEAST SQUARES ESTIMATION - REGIME 1 + ---------------------------------------------------------------- + Data set : NAT + Weights matrix : NAT.shp + Dependent Variable : 1_HR90 Number of Observations: 1412 + Mean dependent var : 9.5493 Number of Variables : 4 + S.D. dependent var : 7.0389 Degrees of Freedom : 1408 + Pseudo R-squared : 0.2987 + + ------------------------------------------------------------------------------------ + Variable Coefficient Std.Error z-Statistic Probability + ------------------------------------------------------------------------------------ + 1_CONSTANT 9.5587324 0.4918122 19.4357356 0.0000000 + 1_PS90 1.9466635 0.1963077 9.9163867 0.0000000 + 1_UE90 -0.3081021 0.0778459 -3.9578483 0.0000756 + 1_RD90 3.6871812 0.2552901 14.4431026 0.0000000 + ------------------------------------------------------------------------------------ + Instrumented: 1_RD90 + Instruments: 1_FP89 + Regimes variable: SOUTH + ------------------------------------------------------------------------------------ + GLOBAL DIAGNOSTICS + + REGIMES DIAGNOSTICS - CHOW TEST + VARIABLE DF VALUE PROB + CONSTANT 1 89.093 0.0000 + PS90 1 15.876 0.0001 + UE90 1 25.106 0.0000 + RD90 1 12.920 0.0003 + Global test 4 201.237 0.0000 + ================================ END OF REPORT ===================================== """ def __init__( @@ -281,6 +347,7 @@ def __init__( w=None, robust=None, gwk=None, + slx_lags=0, sig2n_k=True, spat_diag=False, vm=False, @@ -297,16 +364,36 @@ def __init__( name_gwk=None, name_ds=None, summ=True, + latex=False, ): n = USER.check_arrays(y, x) y = USER.check_y(y, n) - USER.check_weights(w, y) USER.check_robust(robust, gwk) + if robust == "hac": + if regime_err_sep: + set_warn( + self, + "Error by regimes is not available for HAC estimation. The error by regimes has been disabled for this model.", + ) + regime_err_sep = False + if spat_diag: + set_warn( + self, + "Spatial diagnostics are not available for HAC estimation. The spatial diagnostics have been disabled for this model.", + ) + spat_diag = False USER.check_spat_diag(spat_diag, w) x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) + if slx_lags > 0: + USER.check_weights(w, y, w_required=True) + lag_x = get_lags(w, x_constant, slx_lags) + x_constant = np.hstack((x_constant, lag_x)) + name_x += USER.set_name_spatial_lags(name_x, slx_lags) + else: + USER.check_weights(w, y, w_required=False) self.constant_regi = constant_regi self.cols2regi = cols2regi self.name_ds = USER.set_name_ds(name_ds) @@ -324,12 +411,6 @@ def __init__( self.regimes_set = REGI._get_regimes_set(regimes) self.regimes = regimes USER.check_regimes(self.regimes_set, self.n, x_constant.shape[1]) - if regime_err_sep == True and robust == "hac": - set_warn( - self, - "Error by regimes is incompatible with HAC estimation for 2SLS models. Hence, the error by regimes has been disabled for this model.", - ) - regime_err_sep = False self.regime_err_sep = regime_err_sep if ( regime_err_sep == True @@ -348,6 +429,7 @@ def __init__( regi_ids, cores, gwk, + slx_lags, sig2n_k, robust, spat_diag, @@ -355,20 +437,23 @@ def __init__( name_x, name_yend, name_q, + summ, + latex ) else: q, self.name_q = REGI.Regimes_Frame.__init__( self, q, regimes, constant_regi=None, cols2regi="all", names=name_q ) - x, self.name_x = REGI.Regimes_Frame.__init__( + x, self.name_x, x_rlist = REGI.Regimes_Frame.__init__( self, x_constant, regimes, constant_regi, cols2regi=cols2regi, names=name_x, + rlist=True ) - yend, self.name_yend = REGI.Regimes_Frame.__init__( + yend, self.name_yend, yend_rlist = REGI.Regimes_Frame.__init__( self, yend, regimes, @@ -376,13 +461,23 @@ def __init__( cols2regi=cols2regi, yend=True, names=name_yend, + rlist=True ) - if regime_err_sep == True and robust == None: - robust = "white" + self.output = pd.DataFrame(self.name_x+self.name_yend, + columns=['var_names']) + self.output['var_type'] = ['x']*len(self.name_x)+['yend']*len(self.name_yend) + self.output['regime'] = x_rlist+yend_rlist + self.output['equation'] = 0 + BaseTSLS.__init__( self, y=y, x=x, yend=yend, q=q, robust=robust, gwk=gwk, sig2n_k=sig2n_k ) - self.title = "TWO STAGE LEAST SQUARES - REGIMES" + + if slx_lags == 0: + self.title = "TWO STAGE LEAST SQUARES - REGIMES" + else: + self.title = "TWO STAGE LEAST SQUARES WITH SPATIALLY LAGGED X (2SLS-SLX) - REGIMES" + if robust == "ogmm": _optimal_weight(self, sig2n_k) self.name_z = self.name_x + self.name_yend @@ -390,7 +485,12 @@ def __init__( self.chow = REGI.Chow(self) self.robust = USER.set_robust(robust) if summ: - SUMMARY.TSLS(reg=self, vm=vm, w=w, spat_diag=spat_diag, regimes=True) + if spat_diag: + diag_out = _spat_diag_out(self, w, 'yend') + else: + diag_out = None + output(reg=self, vm=vm, robust=robust, other_end=diag_out, latex=latex) + def _tsls_regimes_multi( self, @@ -401,6 +501,7 @@ def _tsls_regimes_multi( regi_ids, cores, gwk, + slx_lags, sig2n_k, robust, spat_diag, @@ -408,6 +509,8 @@ def _tsls_regimes_multi( name_x, name_yend, name_q, + summ, + latex ): results_p = {} """ @@ -421,7 +524,7 @@ def _tsls_regimes_multi( is_win = False """ x_constant, name_x = REGI.check_const_regi(self, x, name_x, regi_ids) - self.name_x_r = name_x + self.name_x_r = name_x + name_yend for r in self.regimes_set: if cores: pool = mp.Pool(None) @@ -444,6 +547,7 @@ def _tsls_regimes_multi( name_q, self.name_w, self.name_regimes, + slx_lags ), ) else: @@ -465,6 +569,7 @@ def _tsls_regimes_multi( name_q, self.name_w, self.name_regimes, + slx_lags ) ) @@ -495,6 +600,7 @@ def _tsls_regimes_multi( self.name_h, ) = ([], [], [], [], [], []) counter = 0 + self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) for r in self.regimes_set: """ if is_win: @@ -526,8 +632,13 @@ def _tsls_regimes_multi( self.name_q += results[r].name_q self.name_z += results[r].name_z self.name_h += results[r].name_h + self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_x+results[r].name_yend, + 'var_type': ['x']*len(results[r].name_x)+['yend']*len(results[r].name_yend), + 'regime': r, 'equation': r})], ignore_index=True) + counter += 1 self.multi = results + self.hac_var = sphstack(x_constant[:, 1:], q) if robust == "hac": hac_multi(self, gwk) @@ -539,9 +650,12 @@ def _tsls_regimes_multi( self.chow = REGI.Chow(self) if spat_diag: self._get_spat_diag_props(results, regi_ids, x_constant, yend, q) - SUMMARY.TSLS_multi( - reg=self, multireg=self.multi, vm=vm, spat_diag=spat_diag, regimes=True, w=w - ) + diag_out = _spat_diag_out(self, w, 'yend') + else: + diag_out = None + if summ: + self.output.sort_values(by='regime', inplace=True) + output(reg=self, vm=vm, robust=robust, other_end=diag_out, latex=latex) def _get_spat_diag_props(self, results, regi_ids, x, yend, q): self._cache = {} @@ -578,6 +692,7 @@ def _work( name_q, name_w, name_regimes, + slx_lags, ): y_r = y[regi_ids[r]] x_r = x[regi_ids[r]] @@ -588,7 +703,10 @@ def _work( else: robust2 = robust model = BaseTSLS(y_r, x_r, yend_r, q_r, robust=robust2, sig2n_k=sig2n_k) - model.title = "TWO STAGE LEAST SQUARES ESTIMATION - REGIME %s" % r + if slx_lags == 0: + model.title = "TWO STAGE LEAST SQUARES ESTIMATION - REGIME %s" % r + else: + model.title = "TWO STAGE LEAST SQUARES ESTIMATION WITH SLX - REGIME %s" % r if robust == "ogmm": _optimal_weight(model, sig2n_k, warn=False) model.robust = USER.set_robust(robust) @@ -628,7 +746,7 @@ def _optimal_weight(reg, sig2n_k, warn=True): else: vm = fac2 * reg.n RegressionProps_basic(reg, betas=betas, vm=vm, sig2=False) - reg.title += " (Optimal-Weighted GMM)" + #reg.title += " (Optimal-Weighted GMM)" if warn: set_warn( reg, "Residuals treated as homoskedastic for the purpose of diagnostics." @@ -663,20 +781,31 @@ def _test(): q = np.array([db.by_col(name) for name in q_var]).T r_var = "SOUTH" regimes = db.by_col(r_var) + w = libpysal.weights.Rook.from_shapefile(nat.get_path("natregimes.shp")) + w.transform = "r" tslsr = TSLS_Regimes( y, x, yd, q, regimes, + w = w, constant_regi="many", - spat_diag=False, + spat_diag=True, name_y=y_var, name_x=x_var, name_yend=yd_var, name_q=q_var, name_regimes=r_var, - cols2regi=[False, True, True, True], + #cols2regi=[False, True, True, False], sig2n_k=False, + regime_err_sep = True, + #robust = 'hac', + vm = False ) + print(tslsr.output) print(tslsr.summary) + + + + diff --git a/spreg/twosls_sp.py b/spreg/twosls_sp.py index 47803d69..e5ff8552 100755 --- a/spreg/twosls_sp.py +++ b/spreg/twosls_sp.py @@ -7,8 +7,9 @@ import numpy as np from . import twosls as TSLS from . import user_output as USER -from . import summary_output as SUMMARY from .utils import set_endog, sp_att, set_warn +import pandas as pd +from .output import output, _spat_diag_out, _spat_pseudo_r2 __all__ = ["GM_Lag"] @@ -44,6 +45,9 @@ class BaseGM_Lag(TSLS.BaseTSLS): lag_q : boolean If True, then include spatial lags of the additional instruments (q). + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the Spatial Durbin type. robust : string If 'white', then a White consistent estimator of the variance-covariance matrix is given. If 'hac', then a @@ -172,15 +176,19 @@ def __init__( q=None, w=None, w_lags=1, + slx_lags=0, lag_q=True, robust=None, gwk=None, sig2n_k=False, ): - yend2, q2 = set_endog( - y, x[:, 1:], w, yend, q, w_lags, lag_q - ) # assumes constant in first column + if slx_lags > 0: + yend2, q2, wx = set_endog(y, x[:, 1:], w, yend, q, w_lags, lag_q, slx_lags) + x = np.hstack((x, wx)) + else: + yend2, q2 = set_endog(y, x[:, 1:], w, yend, q, w_lags, lag_q) + TSLS.BaseTSLS.__init__( self, y=y, x=x, yend=yend2, q=q2, robust=robust, gwk=gwk, sig2n_k=sig2n_k ) @@ -216,6 +224,9 @@ class GM_Lag(BaseGM_Lag): lag_q : boolean If True, then include spatial lags of the additional instruments (q). + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the Spatial Durbin type. robust : string If 'white', then a White consistent estimator of the variance-covariance matrix is given. If 'hac', then a @@ -245,9 +256,13 @@ class GM_Lag(BaseGM_Lag): Name of kernel weights matrix for use in output name_ds : string Name of dataset for use in output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -478,6 +493,7 @@ def __init__( w=None, w_lags=1, lag_q=True, + slx_lags=0, robust=None, gwk=None, sig2n_k=False, @@ -490,13 +506,24 @@ def __init__( name_w=None, name_gwk=None, name_ds=None, + latex=False, ): n = USER.check_arrays(x, yend, q) y = USER.check_y(y, n) USER.check_weights(w, y, w_required=True) USER.check_robust(robust, gwk) + if robust == "hac" and spat_diag: + set_warn( + self, + "Spatial diagnostics are not available for HAC estimation. The spatial diagnostics have been disabled for this model.", + ) + spat_diag = False x_constant, name_x, warn = USER.check_constant(x, name_x) + + if slx_lags > 0: + name_x += USER.set_name_spatial_lags(name_x, slx_lags) + set_warn(self, warn) BaseGM_Lag.__init__( self, @@ -506,6 +533,7 @@ def __init__( yend=yend, q=q, w_lags=w_lags, + slx_lags=slx_lags, robust=robust, gwk=gwk, lag_q=lag_q, @@ -517,20 +545,38 @@ def __init__( ) set_warn(self, warn) self.title = "SPATIAL TWO STAGE LEAST SQUARES" + if slx_lags > 0: + self.title += " WITH SLX (SPATIAL DURBIN MODEL)" self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) - self.name_x = USER.set_name_x(name_x, x_constant) + self.name_x = USER.set_name_x(name_x, x_constant) # name_x contains SLX terms for slx_lags > 0 self.name_yend = USER.set_name_yend(name_yend, yend) self.name_yend.append(USER.set_name_yend_sp(self.name_y)) self.name_z = self.name_x + self.name_yend self.name_q = USER.set_name_q(name_q, q) - self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) + if slx_lags > 0: # need to remove all but last SLX variables from name_x + self.name_x0 = [] + self.name_x0.append(self.name_x[0]) # constant +# print(f"x0 first {self.name_x0}") + kx = int((self.k -self.kstar -1)/(slx_lags +1) ) # number of original exogenous vars + self.name_x0.extend(self.name_x[-kx:]) +# print(f"in here {self.name_x0}") + self.name_q.extend(USER.set_name_q_sp(self.name_x0, w_lags, self.name_q, lag_q)) + else: + self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) self.name_h = USER.set_name_h(self.name_x, self.name_q) self.robust = USER.set_robust(robust) self.name_w = USER.set_name_w(name_w, w) self.name_gwk = USER.set_name_w(name_gwk, gwk) - SUMMARY.GM_Lag(reg=self, w=w, vm=vm, spat_diag=spat_diag) - + self.output = pd.DataFrame(self.name_x + self.name_yend, columns=['var_names']) + self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend)-1) + ['rho'] + self.output['regime'], self.output['equation'] = (0, 0) + self.other_top = _spat_pseudo_r2(self) + if spat_diag: + diag_out = _spat_diag_out(self, w, 'yend') + else: + diag_out = None + output(reg=self, vm=vm, robust=robust, other_end=diag_out, latex=latex) def _test(): import doctest @@ -572,4 +618,5 @@ def _test(): name_ds="columbus", name_w="columbus.gal", ) + print(model.output) print(model.summary) diff --git a/spreg/twosls_sp_regimes.py b/spreg/twosls_sp_regimes.py index cfc0bda7..db0b23c3 100644 --- a/spreg/twosls_sp_regimes.py +++ b/spreg/twosls_sp_regimes.py @@ -7,12 +7,13 @@ import numpy as np from . import regimes as REGI from . import user_output as USER -from . import summary_output as SUMMARY import multiprocessing as mp from .twosls_regimes import TSLS_Regimes, _optimal_weight from .twosls import BaseTSLS from .utils import set_endog, set_endog_sparse, sp_att, set_warn, sphstack, spdot from .robust import hac_multi +import pandas as pd +from .output import output, _spat_diag_out, _spat_pseudo_r2 class GM_Lag_Regimes(TSLS_Regimes, REGI.Regimes_Frame): @@ -62,6 +63,9 @@ class GM_Lag_Regimes(TSLS_Regimes, REGI.Regimes_Frame): lag_q : boolean If True, then include spatial lags of the additional instruments (q). + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the Spatial Durbin type. regime_lag_sep: boolean If True (default), the spatial parameter for spatial lag is also computed according to different regimes. If False, @@ -107,9 +111,13 @@ class GM_Lag_Regimes(TSLS_Regimes, REGI.Regimes_Frame): Name of dataset for use in output name_regimes : string Name of regimes variable for use in output + latex : boolean + Specifies if summary is to be printed in latex format Attributes ---------- + output : dataframe + regression results pandas dataframe summary : string Summary of regression results and diagnostics (note: use in conjunction with the print command) @@ -378,15 +386,16 @@ class GM_Lag_Regimes(TSLS_Regimes, REGI.Regimes_Frame): models, the argument regime_lag_sep must be set to True: >>> model=GM_Lag_Regimes(y, x, regimes, w=w, regime_lag_sep=True, name_y=y_var, name_x=x_var, name_regimes=r_var, name_ds='NAT', name_w='NAT.shp') - >>> print(np.hstack((np.array(model.name_z).reshape(8,1),model.betas,np.sqrt(model.vm.diagonal().reshape(8,1))))) - [['0_CONSTANT' '1.3658476998618099' '0.3985472089832652'] - ['0_PS90' '0.8087573074246643' '0.11324884794883601'] - ['0_UE90' '0.5694681319188577' '0.04625087717092595'] - ['0_W_HR90' '-0.43424389464634316' '0.13350159258670305'] - ['1_CONSTANT' '7.90731073341874' '1.6360187416950998'] - ['1_PS90' '1.2746570332609135' '0.2470987049452741'] - ['1_UE90' '0.6016769336173784' '0.07993322102145078'] - ['1_W_HR90' '-0.2960338343846942' '0.19934459782427025']] + >>> print(model.output) + var_names coefficients std_err zt_stat prob + 0 0_CONSTANT 1.365848 0.398547 3.427066 0.00061 + 1 0_PS90 0.808757 0.113249 7.141418 0.0 + 2 0_UE90 0.569468 0.046251 12.312591 0.0 + 3 0_W_HR90 -0.434244 0.133502 -3.252724 0.001143 + 4 1_CONSTANT 7.907311 1.636019 4.833264 0.000001 + 5 1_PS90 1.274657 0.247099 5.158493 0.0 + 6 1_UE90 0.601677 0.079933 7.527245 0.0 + 7 1_W_HR90 -0.296034 0.199345 -1.485036 0.137534 Alternatively, we can type: 'model.summary' to see the organized results output. The class is flexible enough to accomodate a spatial lag model that, @@ -432,6 +441,7 @@ def __init__( q=None, w=None, w_lags=1, + slx_lags=0, lag_q=True, robust=None, gwk=None, @@ -439,7 +449,7 @@ def __init__( spat_diag=False, constant_regi="many", cols2regi="all", - regime_lag_sep=False, + regime_lag_sep=True, regime_err_sep=True, cores=False, vm=False, @@ -451,12 +461,32 @@ def __init__( name_w=None, name_gwk=None, name_ds=None, + latex=False, ): n = USER.check_arrays(y, x) y = USER.check_y(y, n) USER.check_weights(w, y, w_required=True) USER.check_robust(robust, gwk) + if regime_lag_sep and not regime_err_sep: + set_warn(self, "regime_err_sep set to True when regime_lag_sep=True.") + regime_err_sep = True + if regime_err_sep and not regime_lag_sep: + set_warn(self, "Groupwise heteroskedasticity is not currently available for this method,\n so regime_err_sep has been set to False.") + regime_err_sep = False + if robust == "hac": + if regime_err_sep: + set_warn( + self, + "Error by regimes is not available for HAC estimation. The error by regimes has been disabled for this model.", + ) + regime_err_sep = False + if spat_diag: + set_warn( + self, + "Spatial diagnostics are not available for HAC estimation. The spatial diagnostics have been disabled for this model.", + ) + spat_diag = False USER.check_spat_diag(spat_diag, w) x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) set_warn(self, warn) @@ -464,12 +494,20 @@ def __init__( name_y = USER.set_name_y(name_y) name_yend = USER.set_name_yend(name_yend, yend) name_q = USER.set_name_q(name_q, q) - name_q.extend(USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True)) self.name_regimes = USER.set_name_ds(name_regimes) self.constant_regi = constant_regi - self.n = n + if slx_lags > 0: + yend2, q2, wx = set_endog(y, x_constant, w, yend, q, w_lags, lag_q, slx_lags) + x_constant = np.hstack((x_constant, wx)) + name_slx = USER.set_name_spatial_lags(name_x, slx_lags) + name_q.extend(USER.set_name_q_sp(name_slx[-len(name_x):], w_lags, name_q, lag_q, force_all=True)) + name_x += name_slx + else: + name_q.extend(USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True)) + yend2, q2 = yend, q + self.n = x_constant.shape[0] cols2regi = REGI.check_cols2regi( - constant_regi, cols2regi, x_constant, yend=yend, add_cons=False + constant_regi, cols2regi, x_constant, yend=yend2, add_cons=False ) self.cols2regi = cols2regi self.regimes_set = REGI._get_regimes_set(regimes) @@ -485,8 +523,6 @@ def __init__( self.regime_err_sep = regime_err_sep self.regime_lag_sep = regime_lag_sep if regime_lag_sep == True: - if not regime_err_sep: - raise Exception("regime_err_sep must be True when regime_lag_sep=True.") cols2regi += [True] w_i, regi_ids, warn = REGI.w_regimes( w, @@ -513,9 +549,10 @@ def __init__( w_i, w, regi_ids, - yend=yend, - q=q, + yend=yend2, + q=q2, w_lags=w_lags, + slx_lags=slx_lags, lag_q=lag_q, cores=cores, robust=robust, @@ -532,11 +569,13 @@ def __init__( name_w=name_w, name_gwk=name_gwk, name_ds=name_ds, + latex=latex, ) else: if regime_lag_sep == True: w = REGI.w_regimes_union(w, w_i, self.regimes_set) - yend2, q2 = set_endog(y, x_constant, w, yend, q, w_lags, lag_q) + if slx_lags == 0: + yend2, q2 = set_endog(y, x_constant, w, yend2, q2, w_lags, lag_q) name_yend.append(USER.set_name_yend_sp(name_y)) TSLS_Regimes.__init__( self, @@ -564,17 +603,26 @@ def __init__( name_ds=name_ds, summ=False, ) - if regime_lag_sep: + + if regime_lag_sep: # not currently available. self.sp_att_reg(w_i, regi_ids, yend2[:, -1].reshape(self.n, 1)) else: self.rho = self.betas[-1] + self.output.iat[-1, self.output.columns.get_loc('var_type')] = 'rho' self.predy_e, self.e_pred, warn = sp_att( w, self.y, self.predy, yend2[:, -1].reshape(self.n, 1), self.rho ) set_warn(self, warn) self.regime_lag_sep = regime_lag_sep self.title = "SPATIAL " + self.title - SUMMARY.GM_Lag(reg=self, w=w, vm=vm, spat_diag=spat_diag, regimes=True) + if slx_lags > 0: + self.title = " SPATIAL 2SLS WITH SLX (SPATIAL DURBIN MODEL) - REGIMES" + self.other_top = _spat_pseudo_r2(self) + if spat_diag: + diag_out = _spat_diag_out(self, w, 'yend') + else: + diag_out = None + output(reg=self, vm=vm, robust=robust, other_end=diag_out, latex=latex) def GM_Lag_Regimes_Multi( self, @@ -587,6 +635,7 @@ def GM_Lag_Regimes_Multi( yend=None, q=None, w_lags=1, + slx_lags=0, lag_q=True, robust=None, gwk=None, @@ -602,6 +651,7 @@ def GM_Lag_Regimes_Multi( name_w=None, name_gwk=None, name_ds=None, + latex=False, ): # pool = mp.Pool(cores) self.name_ds = USER.set_name_ds(name_ds) @@ -636,6 +686,7 @@ def GM_Lag_Regimes_Multi( q, w_r, w_lags, + slx_lags, lag_q, robust, sig2n_k, @@ -659,6 +710,7 @@ def GM_Lag_Regimes_Multi( q, w_r, w_lags, + slx_lags, lag_q, robust, sig2n_k, @@ -702,6 +754,7 @@ def GM_Lag_Regimes_Multi( self.name_h, ) = ([], [], [], [], [], []) counter = 0 + self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) for r in self.regimes_set: """ if is_win: @@ -752,6 +805,14 @@ def GM_Lag_Regimes_Multi( self.hac_var[ regi_ids[r], ] = results[r].h + results[r].other_top = _spat_pseudo_r2(results[r]) + results[r].other_mid = "" + if spat_diag: + results[r].other_mid += _spat_diag_out(results[r], results[r].w, 'yend') + self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_x + results[r].name_yend, + 'var_type': ['x'] * len(results[r].name_x) + [ + 'yend'] * (len(results[r].name_yend)-1) + ['rho'], + 'regime': r, 'equation': r})], ignore_index=True) counter += 1 self.multi = results if robust == "hac": @@ -763,11 +824,9 @@ def GM_Lag_Regimes_Multi( ) self.chow = REGI.Chow(self) if spat_diag: - pass # self._get_spat_diag_props(y, x, w, yend, q, w_lags, lag_q) - SUMMARY.GM_Lag_multi( - reg=self, multireg=self.multi, vm=vm, spat_diag=spat_diag, regimes=True, w=w - ) + pass + output(reg=self, vm=vm, robust=robust, other_end=False, latex=latex) def sp_att_reg(self, w_i, regi_ids, wy): predy_e_r, e_pred_r = {}, {} @@ -823,6 +882,7 @@ def _work( q, w_r, w_lags, + slx_lags, lag_q, robust, sig2n_k, @@ -844,14 +904,18 @@ def _work( q_r = q[regi_ids[r]] else: q_r = q - yend_r, q_r = set_endog_sparse(y_r, x_r[:, 1:], w_r, yend_r, q_r, w_lags, lag_q) + if slx_lags == 0: + yend_r, q_r = set_endog_sparse(y_r, x_r[:, 1:], w_r, yend_r, q_r, w_lags, lag_q) + title = "SPATIAL TWO STAGE LEAST SQUARES ESTIMATION - REGIME %s" % r + else: + title = "SPATIAL 2SLS WITH SLX (SPATIAL DURBIN MODEL) - REGIME %s" % r # x_constant = USER.check_constant(x_r) if robust == "hac" or robust == "ogmm": robust2 = None else: robust2 = robust model = BaseTSLS(y_r, x_r, yend_r, q_r, robust=robust2, sig2n_k=sig2n_k) - model.title = "SPATIAL TWO STAGE LEAST SQUARES ESTIMATION - REGIME %s" % r + model.title = title if robust == "ogmm": _optimal_weight(model, sig2n_k, warn=False) model.rho = model.betas[-1] @@ -917,6 +981,8 @@ def _test(): name_ds="columbus", name_w="columbus.gal", regime_err_sep=True, + regime_lag_sep = True, robust="white", ) + print(model.output) print(model.summary) diff --git a/spreg/user_output.py b/spreg/user_output.py index ffdb2324..cf7f9e68 100755 --- a/spreg/user_output.py +++ b/spreg/user_output.py @@ -1,7 +1,8 @@ """Internal helper files for user output.""" __author__ = ( - "Luc Anselin luc.anselin@asu.edu, " + "Luc Anselin lanselin@gmail.com, " + "Pedro Amaral pedrovma@gmail.com" "David C. Folch david.folch@asu.edu, " "Levi John Wolf levi.john.wolf@gmail.com, " "Jing Yao jingyao@asu.edu" @@ -147,6 +148,23 @@ def set_name_yend_sp(name_y): """ return "W_" + name_y +def set_name_spatial_lags(names, w_lags): + """Set the spatial lag names for multiple variables and lag orders" + + Parameters + ---------- + names : string + Original variables' names. + + Returns + ------- + lag_names : string + + """ + lag_names = ["W_" + s for s in names] + for i in range(w_lags-1): + lag_names += ["W" + str(i+2) + "_" + s for s in names] + return lag_names def set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=False): """Set the spatial instrument names in regression; return generic name if user @@ -544,8 +562,9 @@ def check_robust(robust, wk): # NOTE: we are not checking for the case of exactly 1.0 ### raise Exception("Off-diagonal entries must be less than 1.") elif robust.lower() == "white" or robust.lower() == "ogmm": - if wk: - raise Exception("White requires that wk be set to None") + # if wk: + # raise Exception("White requires that wk be set to None") + pass # these options are not affected by wk else: raise Exception( "invalid value passed to robust, see docs for valid options" diff --git a/spreg/utils.py b/spreg/utils.py index ff4f6d2c..c369c74d 100755 --- a/spreg/utils.py +++ b/spreg/utils.py @@ -479,7 +479,7 @@ def get_lags(w, x, w_lags): Returns -------- rs : array - nxk*(w_lags+1) array with original and spatially lagged variables + nxk*(w_lags) array with spatially lagged variables """ lag = lag_spatial(w, x) @@ -489,6 +489,43 @@ def get_lags(w, x, w_lags): spat_lags = sphstack(spat_lags, lag) return spat_lags +def get_lags_split(w, x, max_lags, split_at): + """ + Calculates a given order of spatial lags and all the smaller orders, + separated into two groups (up to split_at and above) + + Parameters + ---------- + w : weight + PySAL weights instance + x : array + nxk arrays with the variables to be lagged + max_lags : integer + Maximum order of spatial lag + split_at: integer + Separates the resulting lags into two groups: up to split_at and above + + Returns + -------- + rs_l,rs_h: tuple of arrays + rs_l: nxk*(split_at) array with spatially lagged variables up to split_at + rs_h: nxk*(w_lags-split_at) array with spatially lagged variables above split_at + + """ + rs_l = lag = lag_spatial(w, x) + rs_h = None + if 0 < split_at < max_lags: + for _ in range(split_at-1): + lag = lag_spatial(w, lag) + rs_l = sphstack(rs_l, lag) + + for i in range(max_lags - split_at): + lag = lag_spatial(w, lag) + rs_h = sphstack(rs_h, lag) if i > 0 else lag + else: + raise ValueError("max_lags must be greater than split_at and split_at must be greater than 0") + + return rs_l, rs_h def inverse_prod( w, @@ -571,9 +608,11 @@ def inverse_prod( except: matrix = la.inv(np.eye(w.shape[0]) - (scalar * w)) if post_multiply: - inv_prod = spdot(data.T, matrix) +# inv_prod = spdot(data.T, matrix) + inv_prod = np.matmul(data.T,matrix) # inverse matrix is dense, wrong type in spdot else: - inv_prod = spdot(matrix, data) +# inv_prod = spdot(matrix, data) + inv_prod = np.matmul(matrix,data) else: raise Exception("Invalid method selected for inversion.") return inv_prod @@ -623,24 +662,35 @@ def power_expansion( return running_total -def set_endog(y, x, w, yend, q, w_lags, lag_q): +def set_endog(y, x, w, yend, q, w_lags, lag_q, slx_lags=0): # Create spatial lag of y yl = lag_spatial(w, y) # spatial and non-spatial instruments if issubclass(type(yend), np.ndarray): + if slx_lags > 0: + lag_x, lag_xq = get_lags_split(w, x, slx_lags+1, slx_lags) + else: + lag_xq = x if lag_q: - lag_vars = sphstack(x, q) + lag_vars = sphstack(lag_xq, q) else: - lag_vars = x + lag_vars = lag_xq spatial_inst = get_lags(w, lag_vars, w_lags) q = sphstack(q, spatial_inst) yend = sphstack(yend, yl) elif yend == None: # spatial instruments only - q = get_lags(w, x, w_lags) + if slx_lags > 0: + lag_x, lag_xq = get_lags_split(w, x, slx_lags+w_lags, slx_lags) + else: + lag_xq = get_lags(w, x, w_lags) + q = lag_xq yend = yl else: raise Exception("invalid value passed to yend") - return yend, q + if slx_lags == 0: + return yend, q + else: + return yend, q, lag_x lag = lag_spatial(w, x) spat_lags = lag From 0fb086f325f1197100c2bb6a88436712e10bad04 Mon Sep 17 00:00:00 2001 From: Pedro Amaral Date: Wed, 13 Sep 2023 17:13:00 -0300 Subject: [PATCH 2/6] Fixing non-SUR test failures --- spreg/ml_error.py | 2 +- spreg/tests/test_error_sp.py | 431 ++++++-------- spreg/tests/test_error_sp_sparse.py | 438 ++++++--------- spreg/tests/test_twosls_sp_regimes.py | 774 ++++++++++---------------- 4 files changed, 627 insertions(+), 1018 deletions(-) diff --git a/spreg/ml_error.py b/spreg/ml_error.py index bb338677..b6913d25 100644 --- a/spreg/ml_error.py +++ b/spreg/ml_error.py @@ -501,7 +501,7 @@ def __init__( ) self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) - self.name_x = USER.set_name_x(name_x, x) + self.name_x = USER.set_name_x(name_x, x_constant) self.name_x.append("lambda") self.name_w = USER.set_name_w(name_w, w) self.aic = DIAG.akaike(reg=self) diff --git a/spreg/tests/test_error_sp.py b/spreg/tests/test_error_sp.py index 610d25bf..c87999af 100644 --- a/spreg/tests/test_error_sp.py +++ b/spreg/tests/test_error_sp.py @@ -6,129 +6,104 @@ from spreg import utils from libpysal.common import RTOL - class TestBaseGMError(unittest.TestCase): def setUp(self): - db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + db=libpysal.io.open(libpysal.examples.get_path("columbus.dbf"),"r") y = np.array(db.by_col("HOVAL")) - self.y = np.reshape(y, (49, 1)) + self.y = np.reshape(y, (49,1)) X = [] X.append(db.by_col("INC")) X.append(db.by_col("CRIME")) self.X = np.array(X).T - self.X = np.hstack((np.ones(self.y.shape), self.X)) - self.w = libpysal.weights.Rook.from_shapefile( - libpysal.examples.get_path("columbus.shp") - ) - self.w.transform = "r" + self.X = np.hstack((np.ones(self.y.shape),self.X)) + self.w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + self.w.transform = 'r' def test_model(self): reg = SP.BaseGM_Error(self.y, self.X, self.w.sparse) - betas = np.array([[47.94371455], [0.70598088], [-0.55571746], [0.37230161]]) - np.testing.assert_allclose(reg.betas, betas, RTOL) - u = np.array([27.4739775]) - np.testing.assert_allclose(reg.u[0], u, RTOL) - predy = np.array([52.9930255]) - np.testing.assert_allclose(reg.predy[0], predy, RTOL) + betas = np.array([[ 47.94371455], [ 0.70598088], [ -0.55571746], [ 0.37230161]]) + np.testing.assert_allclose(reg.betas,betas,RTOL) + u = np.array([ 27.4739775]) + np.testing.assert_allclose(reg.u[0],u,RTOL) + predy = np.array([ 52.9930255]) + np.testing.assert_allclose(reg.predy[0],predy,RTOL) n = 49 - np.testing.assert_allclose(reg.n, n, RTOL) + np.testing.assert_allclose(reg.n,n,RTOL) k = 3 - np.testing.assert_allclose(reg.k, k, RTOL) - y = np.array([80.467003]) - np.testing.assert_allclose(reg.y[0], y, RTOL) - x = np.array([1.0, 19.531, 15.72598]) - np.testing.assert_allclose(reg.x[0], x, RTOL) - e = np.array([31.89620319]) - np.testing.assert_allclose(reg.e_filtered[0], e, RTOL) - predy = np.array([52.9930255]) - np.testing.assert_allclose(reg.predy[0], predy, RTOL) + np.testing.assert_allclose(reg.k,k,RTOL) + y = np.array([ 80.467003]) + np.testing.assert_allclose(reg.y[0],y,RTOL) + x = np.array([ 1. , 19.531 , 15.72598]) + np.testing.assert_allclose(reg.x[0],x,RTOL) + e = np.array([ 31.89620319]) + np.testing.assert_allclose(reg.e_filtered[0],e,RTOL) + predy = np.array([ 52.9930255]) + np.testing.assert_allclose(reg.predy[0],predy,RTOL) my = 38.43622446938776 - np.testing.assert_allclose(reg.mean_y, my, RTOL) + np.testing.assert_allclose(reg.mean_y,my,RTOL) sy = 18.466069465206047 - np.testing.assert_allclose(reg.std_y, sy, RTOL) - vm = np.array( - [ - [1.51884943e02, -5.37622793e00, -1.86970286e00], - [-5.37622793e00, 2.48972661e-01, 5.26564244e-02], - [-1.86970286e00, 5.26564244e-02, 3.18930650e-02], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) + np.testing.assert_allclose(reg.std_y,sy,RTOL) + vm = np.array([[ 1.51884943e+02, -5.37622793e+00, -1.86970286e+00], [ -5.37622793e+00, 2.48972661e-01, 5.26564244e-02], [ -1.86970286e+00, 5.26564244e-02, 3.18930650e-02]]) + np.testing.assert_allclose(reg.vm,vm,RTOL) sig2 = 191.73716465732355 - np.testing.assert_allclose(reg.sig2, sig2, RTOL) - + np.testing.assert_allclose(reg.sig2,sig2,RTOL) class TestGMError(unittest.TestCase): def setUp(self): - db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + db=libpysal.io.open(libpysal.examples.get_path("columbus.dbf"),"r") y = np.array(db.by_col("HOVAL")) - self.y = np.reshape(y, (49, 1)) + self.y = np.reshape(y, (49,1)) X = [] X.append(db.by_col("INC")) X.append(db.by_col("CRIME")) self.X = np.array(X).T - self.w = libpysal.weights.Rook.from_shapefile( - libpysal.examples.get_path("columbus.shp") - ) - self.w.transform = "r" + self.w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + self.w.transform = 'r' def test_model(self): reg = SP.GM_Error(self.y, self.X, self.w) - betas = np.array([[47.94371455], [0.70598088], [-0.55571746], [0.37230161]]) - np.testing.assert_allclose(reg.betas, betas, RTOL) - u = np.array([27.4739775]) - np.testing.assert_allclose(reg.u[0], u, RTOL) - predy = np.array([52.9930255]) - np.testing.assert_allclose(reg.predy[0], predy, RTOL) + betas = np.array([[ 47.94371455], [ 0.70598088], [ -0.55571746], [ 0.37230161]]) + np.testing.assert_allclose(reg.betas,betas,RTOL) + u = np.array([ 27.4739775]) + np.testing.assert_allclose(reg.u[0],u,RTOL) + predy = np.array([ 52.9930255]) + np.testing.assert_allclose(reg.predy[0],predy,RTOL) n = 49 - np.testing.assert_allclose(reg.n, n, RTOL) + np.testing.assert_allclose(reg.n,n,RTOL) k = 3 - np.testing.assert_allclose(reg.k, k, RTOL) - y = np.array([80.467003]) - np.testing.assert_allclose(reg.y[0], y, RTOL) - x = np.array([1.0, 19.531, 15.72598]) - np.testing.assert_allclose(reg.x[0], x, RTOL) - e = np.array([31.89620319]) - np.testing.assert_allclose(reg.e_filtered[0], e, RTOL) - predy = np.array([52.9930255]) - np.testing.assert_allclose(reg.predy[0], predy, RTOL) + np.testing.assert_allclose(reg.k,k,RTOL) + y = np.array([ 80.467003]) + np.testing.assert_allclose(reg.y[0],y,RTOL) + x = np.array([ 1. , 19.531 , 15.72598]) + np.testing.assert_allclose(reg.x[0],x,RTOL) + e = np.array([ 31.89620319]) + np.testing.assert_allclose(reg.e_filtered[0],e,RTOL) + predy = np.array([ 52.9930255]) + np.testing.assert_allclose(reg.predy[0],predy,RTOL) my = 38.43622446938776 - np.testing.assert_allclose(reg.mean_y, my, RTOL) + np.testing.assert_allclose(reg.mean_y,my,RTOL) sy = 18.466069465206047 - np.testing.assert_allclose(reg.std_y, sy, RTOL) - vm = np.array( - [ - [1.51884943e02, -5.37622793e00, -1.86970286e00], - [-5.37622793e00, 2.48972661e-01, 5.26564244e-02], - [-1.86970286e00, 5.26564244e-02, 3.18930650e-02], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) + np.testing.assert_allclose(reg.std_y,sy,RTOL) + vm = np.array([[ 1.51884943e+02, -5.37622793e+00, -1.86970286e+00], [ -5.37622793e+00, 2.48972661e-01, 5.26564244e-02], [ -1.86970286e+00, 5.26564244e-02, 3.18930650e-02]]) + np.testing.assert_allclose(reg.vm,vm,RTOL) sig2 = 191.73716465732355 - np.testing.assert_allclose(reg.sig2, sig2, RTOL) + np.testing.assert_allclose(reg.sig2,sig2,RTOL) pr2 = 0.3495097406012179 - np.testing.assert_allclose(reg.pr2, pr2) - std_err = np.array([12.32416094, 0.4989716, 0.1785863]) - np.testing.assert_allclose(reg.std_err, std_err, RTOL) - z_stat = np.array( - [ - [3.89022140e00, 1.00152805e-04], - [1.41487186e00, 1.57106070e-01], - [-3.11175868e00, 1.85976455e-03], - ] - ) - np.testing.assert_allclose(reg.z_stat, z_stat, RTOL) - + np.testing.assert_allclose(reg.pr2,pr2) + std_err = np.array([ 12.32416094, 0.4989716 , 0.1785863 ]) + np.testing.assert_allclose(reg.std_err,std_err,RTOL) + z_stat = np.array([[ 3.89022140e+00, 1.00152805e-04], [ 1.41487186e+00, 1.57106070e-01], [ -3.11175868e+00, 1.85976455e-03]]) + np.testing.assert_allclose(reg.z_stat,z_stat,RTOL) class TestBaseGMEndogError(unittest.TestCase): def setUp(self): - db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + db=libpysal.io.open(libpysal.examples.get_path("columbus.dbf"),"r") y = np.array(db.by_col("HOVAL")) - self.y = np.reshape(y, (49, 1)) + self.y = np.reshape(y, (49,1)) X = [] X.append(db.by_col("INC")) self.X = np.array(X).T - self.X = np.hstack((np.ones(self.y.shape), self.X)) + self.X = np.hstack((np.ones(self.y.shape),self.X)) yd = [] yd.append(db.by_col("CRIME")) self.yd = np.array(yd).T @@ -136,55 +111,50 @@ def setUp(self): q.append(db.by_col("DISCBD")) self.q = np.array(q).T self.w = weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) - self.w.transform = "r" + self.w.transform = 'r' def test_model(self): reg = SP.BaseGM_Endog_Error(self.y, self.X, self.yd, self.q, self.w.sparse) - betas = np.array([[55.36095292], [0.46411479], [-0.66883535], [0.38989939]]) - # raising a warning causes a failure... - print("Running reduced precision test in L120 of test_error_sp.py") - np.testing.assert_allclose(reg.betas, betas, RTOL + 0.0001) - u = np.array([26.55951566]) - np.testing.assert_allclose(reg.u[0], u, RTOL) - e = np.array([31.23925425]) - np.testing.assert_allclose(reg.e_filtered[0], e, RTOL) - predy = np.array([53.9074875]) - np.testing.assert_allclose(reg.predy[0], predy, RTOL) + betas = np.array([[ 55.36095292], [ 0.46411479], [ -0.66883535], [ 0.38989939]]) + #raising a warning causes a failure... + print('Running reduced precision test in L120 of test_error_sp.py') + np.testing.assert_allclose(reg.betas,betas,RTOL+.0001) + u = np.array([ 26.55951566]) + np.testing.assert_allclose(reg.u[0],u,RTOL) + e = np.array([ 31.23925425]) + np.testing.assert_allclose(reg.e_filtered[0],e,RTOL) + predy = np.array([ 53.9074875]) + np.testing.assert_allclose(reg.predy[0],predy,RTOL) n = 49 - np.testing.assert_allclose(reg.n, n, RTOL) + np.testing.assert_allclose(reg.n,n,RTOL) k = 3 - np.testing.assert_allclose(reg.k, k, RTOL) - y = np.array([80.467003]) - np.testing.assert_allclose(reg.y[0], y, RTOL) - x = np.array([1.0, 19.531]) - np.testing.assert_allclose(reg.x[0], x, RTOL) - yend = np.array([15.72598]) - np.testing.assert_allclose(reg.yend[0], yend, RTOL) - z = np.array([1.0, 19.531, 15.72598]) - np.testing.assert_allclose(reg.z[0], z, RTOL) + np.testing.assert_allclose(reg.k,k,RTOL) + y = np.array([ 80.467003]) + np.testing.assert_allclose(reg.y[0],y,RTOL) + x = np.array([ 1. , 19.531]) + np.testing.assert_allclose(reg.x[0],x,RTOL) + yend = np.array([ 15.72598]) + np.testing.assert_allclose(reg.yend[0],yend,RTOL) + z = np.array([ 1. , 19.531 , 15.72598]) + np.testing.assert_allclose(reg.z[0],z,RTOL) my = 38.43622446938776 - np.testing.assert_allclose(reg.mean_y, my, RTOL) - # std_y + np.testing.assert_allclose(reg.mean_y,my,RTOL) + #std_y sy = 18.466069465206047 - np.testing.assert_allclose(reg.std_y, sy, RTOL) - # vm - vm = np.array( - [ - [5.29158422e02, -1.57833675e01, -8.38021080e00], - [-1.57833675e01, 5.40235041e-01, 2.31120327e-01], - [-8.38021080e00, 2.31120327e-01, 1.44977385e-01], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) + np.testing.assert_allclose(reg.std_y,sy,RTOL) + #vm + vm = np.array([[ 5.29158422e+02, -1.57833675e+01, -8.38021080e+00], + [ -1.57833675e+01, 5.40235041e-01, 2.31120327e-01], + [ -8.38021080e+00, 2.31120327e-01, 1.44977385e-01]]) + np.testing.assert_allclose(reg.vm,vm,RTOL) sig2 = 192.50022721929574 - np.testing.assert_allclose(reg.sig2, sig2, RTOL) - + np.testing.assert_allclose(reg.sig2,sig2,RTOL) class TestGMEndogError(unittest.TestCase): def setUp(self): - db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + db=libpysal.io.open(libpysal.examples.get_path("columbus.dbf"),"r") y = np.array(db.by_col("HOVAL")) - self.y = np.reshape(y, (49, 1)) + self.y = np.reshape(y, (49,1)) X = [] X.append(db.by_col("INC")) self.X = np.array(X).T @@ -194,180 +164,105 @@ def setUp(self): q = [] q.append(db.by_col("DISCBD")) self.q = np.array(q).T - self.w = libpysal.weights.Rook.from_shapefile( - libpysal.examples.get_path("columbus.shp") - ) - self.w.transform = "r" + self.w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + self.w.transform = 'r' def test_model(self): reg = SP.GM_Endog_Error(self.y, self.X, self.yd, self.q, self.w) - betas = np.array([[55.36095292], [0.46411479], [-0.66883535], [0.38989939]]) - print("Running reduced precision test in L175 of test_error_sp.py") - np.testing.assert_allclose(reg.betas, betas, RTOL + 0.0001) - u = np.array([26.55951566]) - np.testing.assert_allclose(reg.u[0], u, RTOL) - e = np.array([31.23925425]) - np.testing.assert_allclose(reg.e_filtered[0], e, RTOL) - predy = np.array([53.9074875]) - np.testing.assert_allclose(reg.predy[0], predy, RTOL) + betas = np.array([[ 55.36095292], [ 0.46411479], [ -0.66883535], [ 0.38989939]]) + print('Running reduced precision test in L175 of test_error_sp.py') + np.testing.assert_allclose(reg.betas,betas,RTOL +.0001) + u = np.array([ 26.55951566]) + np.testing.assert_allclose(reg.u[0],u,RTOL) + e = np.array([ 31.23925425]) + np.testing.assert_allclose(reg.e_filtered[0],e,RTOL) + predy = np.array([ 53.9074875]) + np.testing.assert_allclose(reg.predy[0],predy,RTOL) n = 49 - np.testing.assert_allclose(reg.n, n, RTOL) + np.testing.assert_allclose(reg.n,n,RTOL) k = 3 - np.testing.assert_allclose(reg.k, k, RTOL) - y = np.array([80.467003]) - np.testing.assert_allclose(reg.y[0], y, RTOL) - x = np.array([1.0, 19.531]) - np.testing.assert_allclose(reg.x[0], x, RTOL) - yend = np.array([15.72598]) - np.testing.assert_allclose(reg.yend[0], yend, RTOL) - z = np.array([1.0, 19.531, 15.72598]) - np.testing.assert_allclose(reg.z[0], z, RTOL) + np.testing.assert_allclose(reg.k,k,RTOL) + y = np.array([ 80.467003]) + np.testing.assert_allclose(reg.y[0],y,RTOL) + x = np.array([ 1. , 19.531]) + np.testing.assert_allclose(reg.x[0],x,RTOL) + yend = np.array([ 15.72598]) + np.testing.assert_allclose(reg.yend[0],yend,RTOL) + z = np.array([ 1. , 19.531 , 15.72598]) + np.testing.assert_allclose(reg.z[0],z,RTOL) my = 38.43622446938776 - np.testing.assert_allclose(reg.mean_y, my, RTOL) + np.testing.assert_allclose(reg.mean_y,my,RTOL) sy = 18.466069465206047 - np.testing.assert_allclose(reg.std_y, sy, RTOL) - vm = np.array( - [ - [5.29158422e02, -1.57833675e01, -8.38021080e00], - [-1.57833675e01, 5.40235041e-01, 2.31120327e-01], - [-8.38021080e00, 2.31120327e-01, 1.44977385e-01], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) + np.testing.assert_allclose(reg.std_y,sy,RTOL) + vm = np.array([[ 5.29158422e+02, -1.57833675e+01, -8.38021080e+00], + [ -1.57833675e+01, 5.40235041e-01, 2.31120327e-01], + [ -8.38021080e+00, 2.31120327e-01, 1.44977385e-01]]) + np.testing.assert_allclose(reg.vm,vm,RTOL) pr2 = 0.346472557570858 - np.testing.assert_allclose(reg.pr2, pr2, RTOL) + np.testing.assert_allclose(reg.pr2,pr2,RTOL) sig2 = 192.50022721929574 - np.testing.assert_allclose(reg.sig2, sig2, RTOL) - std_err = np.array([23.003401, 0.73500657, 0.38075777]) - np.testing.assert_allclose(reg.std_err, std_err, RTOL) - z_stat = np.array( - [ - [2.40664208, 0.01609994], - [0.63144305, 0.52775088], - [-1.75659016, 0.07898769], - ] - ) - np.testing.assert_allclose(reg.z_stat, z_stat, RTOL) - - -class TestBaseGMCombo(unittest.TestCase): - def setUp(self): - db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") - y = np.array(db.by_col("HOVAL")) - self.y = np.reshape(y, (49, 1)) - X = [] - X.append(db.by_col("INC")) - X.append(db.by_col("CRIME")) - self.X = np.array(X).T - self.w = libpysal.weights.Rook.from_shapefile( - libpysal.examples.get_path("columbus.shp") - ) - self.w.transform = "r" - - def test_model(self): - # Only spatial lag - yd2, q2 = utils.set_endog(self.y, self.X, self.w, None, None, 1, True) - self.X = np.hstack((np.ones(self.y.shape), self.X)) - reg = SP.BaseGM_Combo(self.y, self.X, yend=yd2, q=q2, w=self.w.sparse) - betas = np.array( - [[57.61123461], [0.73441314], [-0.59459416], [-0.21762921], [0.54732051]] - ) - np.testing.assert_allclose(reg.betas, betas, RTOL) - u = np.array([25.57932637]) - np.testing.assert_allclose(reg.u[0], u, RTOL) - e_filtered = np.array([31.65374945]) - np.testing.assert_allclose(reg.e_filtered[0], e_filtered, RTOL) - predy = np.array([54.88767663]) - np.testing.assert_allclose(reg.predy[0], predy, RTOL) - n = 49 - np.testing.assert_allclose(reg.n, n, RTOL) - k = 4 - np.testing.assert_allclose(reg.k, k, RTOL) - y = np.array([80.467003]) - np.testing.assert_allclose(reg.y[0], y, RTOL) - x = np.array([1.0, 19.531, 15.72598]) - np.testing.assert_allclose(reg.x[0], x, RTOL) - yend = np.array([35.4585005]) - np.testing.assert_allclose(reg.yend[0], yend, RTOL) - z = np.array([1.0, 19.531, 15.72598, 35.4585005]) - np.testing.assert_allclose(reg.z[0], z, RTOL) - my = 38.43622446938776 - np.testing.assert_allclose(reg.mean_y, my, RTOL) - sy = 18.466069465206047 - np.testing.assert_allclose(reg.std_y, sy, RTOL) - vm = np.array([5.22438365e02, 2.38012873e-01, 3.20924172e-02, 2.15753599e-01]) - np.testing.assert_allclose(np.diag(reg.vm), vm, RTOL) - sig2 = 181.78650186468832 - np.testing.assert_allclose(reg.sig2, sig2, RTOL) + np.testing.assert_allclose(reg.sig2,sig2,RTOL) + std_err = np.array([ 23.003401 , 0.73500657, 0.38075777]) + np.testing.assert_allclose(reg.std_err,std_err,RTOL) + z_stat = np.array([[ 2.40664208, 0.01609994], [ 0.63144305, 0.52775088], [-1.75659016, 0.07898769]]) + np.testing.assert_allclose(reg.z_stat,z_stat,RTOL) class TestGMCombo(unittest.TestCase): def setUp(self): - db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + db=libpysal.io.open(libpysal.examples.get_path("columbus.dbf"),"r") y = np.array(db.by_col("HOVAL")) - self.y = np.reshape(y, (49, 1)) + self.y = np.reshape(y, (49,1)) X = [] X.append(db.by_col("INC")) X.append(db.by_col("CRIME")) self.X = np.array(X).T - self.w = libpysal.weights.Rook.from_shapefile( - libpysal.examples.get_path("columbus.shp") - ) - self.w.transform = "r" - + self.w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + self.w.transform = 'r' def test_model(self): # Only spatial lag reg = SP.GM_Combo(self.y, self.X, w=self.w) - e_reduced = np.array([28.18617481]) - np.testing.assert_allclose(reg.e_pred[0], e_reduced, RTOL) - predy_e = np.array([52.28082782]) - np.testing.assert_allclose(reg.predy_e[0], predy_e, RTOL) - betas = np.array( - [[57.61123515], [0.73441313], [-0.59459416], [-0.21762921], [0.54732051]] - ) - np.testing.assert_allclose(reg.betas, betas, RTOL) - u = np.array([25.57932637]) - np.testing.assert_allclose(reg.u[0], u, RTOL) - e_filtered = np.array([31.65374945]) - np.testing.assert_allclose(reg.e_filtered[0], e_filtered, RTOL) - predy = np.array([54.88767685]) - np.testing.assert_allclose(reg.predy[0], predy, RTOL) + e_reduced = np.array([ 28.18617481]) + np.testing.assert_allclose(reg.e_pred[0],e_reduced,RTOL) + predy_e = np.array([ 52.28082782]) + np.testing.assert_allclose(reg.predy_e[0],predy_e,RTOL) + betas = np.array([[ 57.61123515],[ 0.73441313], [ -0.59459416], [ -0.21762921], [ 0.54732051]]) + np.testing.assert_allclose(reg.betas,betas,RTOL) + u = np.array([ 25.57932637]) + np.testing.assert_allclose(reg.u[0],u,RTOL) + e_filtered = np.array([ 31.65374945]) + np.testing.assert_allclose(reg.e_filtered[0],e_filtered,RTOL) + predy = np.array([ 54.88767685]) + np.testing.assert_allclose(reg.predy[0],predy,RTOL) n = 49 - np.testing.assert_allclose(reg.n, n, RTOL) + np.testing.assert_allclose(reg.n,n,RTOL) k = 4 - np.testing.assert_allclose(reg.k, k, RTOL) - y = np.array([80.467003]) - np.testing.assert_allclose(reg.y[0], y, RTOL) - x = np.array([1.0, 19.531, 15.72598]) - np.testing.assert_allclose(reg.x[0], x, RTOL) - yend = np.array([35.4585005]) - np.testing.assert_allclose(reg.yend[0], yend, RTOL) - z = np.array([1.0, 19.531, 15.72598, 35.4585005]) - np.testing.assert_allclose(reg.z[0], z, RTOL) + np.testing.assert_allclose(reg.k,k,RTOL) + y = np.array([ 80.467003]) + np.testing.assert_allclose(reg.y[0],y,RTOL) + x = np.array([ 1. , 19.531 , 15.72598]) + np.testing.assert_allclose(reg.x[0],x,RTOL) + yend = np.array([ 35.4585005]) + np.testing.assert_allclose(reg.yend[0],yend,RTOL) + z = np.array([ 1. , 19.531 , 15.72598 , 35.4585005]) + np.testing.assert_allclose(reg.z[0],z,RTOL) my = 38.43622446938776 - np.testing.assert_allclose(reg.mean_y, my) + np.testing.assert_allclose(reg.mean_y,my) sy = 18.466069465206047 - np.testing.assert_allclose(reg.std_y, sy) - vm = np.array([5.22438333e02, 2.38012875e-01, 3.20924173e-02, 2.15753579e-01]) - np.testing.assert_allclose(np.diag(reg.vm), vm, RTOL) + np.testing.assert_allclose(reg.std_y,sy) + vm = np.array([ 5.22438333e+02, 2.38012875e-01, 3.20924173e-02, + 2.15753579e-01]) + np.testing.assert_allclose(np.diag(reg.vm),vm,RTOL) sig2 = 181.78650186468832 - np.testing.assert_allclose(reg.sig2, sig2, RTOL) + np.testing.assert_allclose(reg.sig2,sig2,RTOL) pr2 = 0.3018280166937799 - np.testing.assert_allclose(reg.pr2, pr2, RTOL) + np.testing.assert_allclose(reg.pr2,pr2,RTOL) pr2_e = 0.3561355586759414 - np.testing.assert_allclose(reg.pr2_e, pr2_e, RTOL) - std_err = np.array([22.85692222, 0.48786559, 0.17914356, 0.46449318]) - np.testing.assert_allclose(reg.std_err, std_err, RTOL) - z_stat = np.array( - [ - [2.52051597e00, 1.17182922e-02], - [1.50535954e00, 1.32231664e-01], - [-3.31909311e00, 9.03103123e-04], - [-4.68530506e-01, 6.39405261e-01], - ] - ) - np.testing.assert_allclose(reg.z_stat, z_stat, RTOL) - + np.testing.assert_allclose(reg.pr2_e,pr2_e,RTOL) + std_err = np.array([ 22.85692222, 0.48786559, 0.17914356, 0.46449318]) + np.testing.assert_allclose(reg.std_err,std_err,RTOL) + z_stat = np.array([[ 2.52051597e+00, 1.17182922e-02], [ 1.50535954e+00, 1.32231664e-01], [ -3.31909311e+00, 9.03103123e-04], [ -4.68530506e-01, 6.39405261e-01]]) + np.testing.assert_allclose(reg.z_stat,z_stat,RTOL) -if __name__ == "__main__": +if __name__ == '__main__': unittest.main() diff --git a/spreg/tests/test_error_sp_sparse.py b/spreg/tests/test_error_sp_sparse.py index 07fd942a..4fad34cc 100755 --- a/spreg/tests/test_error_sp_sparse.py +++ b/spreg/tests/test_error_sp_sparse.py @@ -5,133 +5,108 @@ from scipy import sparse from spreg import utils from spreg import error_sp as SP -from libpysal.common import RTOL - +from libpysal.common import RTOL class TestBaseGMError(unittest.TestCase): def setUp(self): - db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + db=libpysal.io.open(libpysal.examples.get_path("columbus.dbf"),"r") y = np.array(db.by_col("HOVAL")) - self.y = np.reshape(y, (49, 1)) + self.y = np.reshape(y, (49,1)) X = [] X.append(db.by_col("INC")) X.append(db.by_col("CRIME")) self.X = np.array(X).T - self.X = np.hstack((np.ones(self.y.shape), self.X)) + self.X = np.hstack((np.ones(self.y.shape),self.X)) self.X = sparse.csr_matrix(self.X) - self.w = libpysal.weights.Rook.from_shapefile( - libpysal.examples.get_path("columbus.shp") - ) - self.w.transform = "r" + self.w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + self.w.transform = 'r' def test_model(self): reg = SP.BaseGM_Error(self.y, self.X, self.w.sparse) - betas = np.array([[47.94371455], [0.70598088], [-0.55571746], [0.37230161]]) - np.testing.assert_allclose(reg.betas, betas, RTOL) - u = np.array([27.4739775]) - np.testing.assert_allclose(reg.u[0], u, RTOL) - predy = np.array([52.9930255]) - np.testing.assert_allclose(reg.predy[0], predy, RTOL) + betas = np.array([[ 47.94371455], [ 0.70598088], [ -0.55571746], [ 0.37230161]]) + np.testing.assert_allclose(reg.betas,betas,RTOL) + u = np.array([ 27.4739775]) + np.testing.assert_allclose(reg.u[0],u,RTOL) + predy = np.array([ 52.9930255]) + np.testing.assert_allclose(reg.predy[0],predy,RTOL) n = 49 - np.testing.assert_allclose(reg.n, n, RTOL) + np.testing.assert_allclose(reg.n,n,RTOL) k = 3 - np.testing.assert_allclose(reg.k, k, RTOL) - y = np.array([80.467003]) - np.testing.assert_allclose(reg.y[0], y, RTOL) - x = np.array([1.0, 19.531, 15.72598]) - np.testing.assert_allclose(reg.x.toarray()[0], x, RTOL) - e = np.array([31.89620319]) - np.testing.assert_allclose(reg.e_filtered[0], e, RTOL) - predy = np.array([52.9930255]) - np.testing.assert_allclose(reg.predy[0], predy, RTOL) + np.testing.assert_allclose(reg.k,k,RTOL) + y = np.array([ 80.467003]) + np.testing.assert_allclose(reg.y[0],y,RTOL) + x = np.array([ 1. , 19.531 , 15.72598]) + np.testing.assert_allclose(reg.x.toarray()[0],x,RTOL) + e = np.array([ 31.89620319]) + np.testing.assert_allclose(reg.e_filtered[0],e,RTOL) + predy = np.array([ 52.9930255]) + np.testing.assert_allclose(reg.predy[0],predy,RTOL) my = 38.43622446938776 - np.testing.assert_allclose(reg.mean_y, my, RTOL) + np.testing.assert_allclose(reg.mean_y,my,RTOL) sy = 18.466069465206047 - np.testing.assert_allclose(reg.std_y, sy, RTOL) - vm = np.array( - [ - [1.51884943e02, -5.37622793e00, -1.86970286e00], - [-5.37622793e00, 2.48972661e-01, 5.26564244e-02], - [-1.86970286e00, 5.26564244e-02, 3.18930650e-02], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) + np.testing.assert_allclose(reg.std_y,sy,RTOL) + vm = np.array([[ 1.51884943e+02, -5.37622793e+00, -1.86970286e+00], [ -5.37622793e+00, 2.48972661e-01, 5.26564244e-02], [ -1.86970286e+00, 5.26564244e-02, 3.18930650e-02]]) + np.testing.assert_allclose(reg.vm,vm,RTOL) sig2 = 191.73716465732355 - np.testing.assert_allclose(reg.sig2, sig2, RTOL) - + np.testing.assert_allclose(reg.sig2,sig2,RTOL) class TestGMError(unittest.TestCase): def setUp(self): - db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + db=libpysal.io.open(libpysal.examples.get_path("columbus.dbf"),"r") y = np.array(db.by_col("HOVAL")) - self.y = np.reshape(y, (49, 1)) + self.y = np.reshape(y, (49,1)) X = [] X.append(db.by_col("INC")) X.append(db.by_col("CRIME")) self.X = np.array(X).T self.X = sparse.csr_matrix(self.X) - self.w = libpysal.weights.Rook.from_shapefile( - libpysal.examples.get_path("columbus.shp") - ) - self.w.transform = "r" + self.w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + self.w.transform = 'r' def test_model(self): reg = SP.GM_Error(self.y, self.X, self.w) - betas = np.array([[47.94371455], [0.70598088], [-0.55571746], [0.37230161]]) - np.testing.assert_allclose(reg.betas, betas, RTOL) - u = np.array([27.4739775]) - np.testing.assert_allclose(reg.u[0], u, RTOL) - predy = np.array([52.9930255]) - np.testing.assert_allclose(reg.predy[0], predy, RTOL) + betas = np.array([[ 47.94371455], [ 0.70598088], [ -0.55571746], [ 0.37230161]]) + np.testing.assert_allclose(reg.betas,betas,RTOL) + u = np.array([ 27.4739775]) + np.testing.assert_allclose(reg.u[0],u,RTOL) + predy = np.array([ 52.9930255]) + np.testing.assert_allclose(reg.predy[0],predy,RTOL) n = 49 - np.testing.assert_allclose(reg.n, n, RTOL) + np.testing.assert_allclose(reg.n,n,RTOL) k = 3 - np.testing.assert_allclose(reg.k, k, RTOL) - y = np.array([80.467003]) - np.testing.assert_allclose(reg.y[0], y, RTOL) - x = np.array([1.0, 19.531, 15.72598]) - np.testing.assert_allclose(reg.x.toarray()[0], x, RTOL) - e = np.array([31.89620319]) - np.testing.assert_allclose(reg.e_filtered[0], e, RTOL) - predy = np.array([52.9930255]) - np.testing.assert_allclose(reg.predy[0], predy, RTOL) + np.testing.assert_allclose(reg.k,k,RTOL) + y = np.array([ 80.467003]) + np.testing.assert_allclose(reg.y[0],y,RTOL) + x = np.array([ 1. , 19.531 , 15.72598]) + np.testing.assert_allclose(reg.x.toarray()[0],x,RTOL) + e = np.array([ 31.89620319]) + np.testing.assert_allclose(reg.e_filtered[0],e,RTOL) + predy = np.array([ 52.9930255]) + np.testing.assert_allclose(reg.predy[0],predy,RTOL) my = 38.43622446938776 - np.testing.assert_allclose(reg.mean_y, my, RTOL) + np.testing.assert_allclose(reg.mean_y,my,RTOL) sy = 18.466069465206047 - np.testing.assert_allclose(reg.std_y, sy, RTOL) - vm = np.array( - [ - [1.51884943e02, -5.37622793e00, -1.86970286e00], - [-5.37622793e00, 2.48972661e-01, 5.26564244e-02], - [-1.86970286e00, 5.26564244e-02, 3.18930650e-02], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) + np.testing.assert_allclose(reg.std_y,sy,RTOL) + vm = np.array([[ 1.51884943e+02, -5.37622793e+00, -1.86970286e+00], [ -5.37622793e+00, 2.48972661e-01, 5.26564244e-02], [ -1.86970286e+00, 5.26564244e-02, 3.18930650e-02]]) + np.testing.assert_allclose(reg.vm,vm,RTOL) sig2 = 191.73716465732355 - np.testing.assert_allclose(reg.sig2, sig2, RTOL) + np.testing.assert_allclose(reg.sig2,sig2,RTOL) pr2 = 0.3495097406012179 - np.testing.assert_allclose(reg.pr2, pr2, RTOL) - std_err = np.array([12.32416094, 0.4989716, 0.1785863]) - np.testing.assert_allclose(reg.std_err, std_err, RTOL) - z_stat = np.array( - [ - [3.89022140e00, 1.00152805e-04], - [1.41487186e00, 1.57106070e-01], - [-3.11175868e00, 1.85976455e-03], - ] - ) - np.testing.assert_allclose(reg.z_stat, z_stat, RTOL) - + np.testing.assert_allclose(reg.pr2,pr2,RTOL) + std_err = np.array([ 12.32416094, 0.4989716 , 0.1785863 ]) + np.testing.assert_allclose(reg.std_err,std_err,RTOL) + z_stat = np.array([[ 3.89022140e+00, 1.00152805e-04], [ 1.41487186e+00, 1.57106070e-01], [ -3.11175868e+00, 1.85976455e-03]]) + np.testing.assert_allclose(reg.z_stat,z_stat,RTOL) class TestBaseGMEndogError(unittest.TestCase): def setUp(self): - db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + db=libpysal.io.open(libpysal.examples.get_path("columbus.dbf"),"r") y = np.array(db.by_col("HOVAL")) - self.y = np.reshape(y, (49, 1)) + self.y = np.reshape(y, (49,1)) X = [] X.append(db.by_col("INC")) self.X = np.array(X).T - self.X = np.hstack((np.ones(self.y.shape), self.X)) + self.X = np.hstack((np.ones(self.y.shape),self.X)) self.X = sparse.csr_matrix(self.X) yd = [] yd.append(db.by_col("CRIME")) @@ -139,57 +114,50 @@ def setUp(self): q = [] q.append(db.by_col("DISCBD")) self.q = np.array(q).T - self.w = libpysal.weights.Rook.from_shapefile( - libpysal.examples.get_path("columbus.shp") - ) - self.w.transform = "r" + self.w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + self.w.transform = 'r' def test_model(self): reg = SP.BaseGM_Endog_Error(self.y, self.X, self.yd, self.q, self.w.sparse) - betas = np.array([[55.36095292], [0.46411479], [-0.66883535], [0.38989939]]) - print("Running reduced-precision test in L125 of test_error_sp_sparse.py") - np.testing.assert_allclose(reg.betas, betas, RTOL + 0.0001) - u = np.array([26.55951566]) - np.testing.assert_allclose(reg.u[0], u, RTOL) - e = np.array([31.23925425]) - np.testing.assert_allclose(reg.e_filtered[0], e, RTOL) - predy = np.array([53.9074875]) - np.testing.assert_allclose(reg.predy[0], predy, RTOL) + betas = np.array([[ 55.36095292], [ 0.46411479], [ -0.66883535], [ 0.38989939]]) + print('Running reduced-precision test in L125 of test_error_sp_sparse.py') + np.testing.assert_allclose(reg.betas,betas,RTOL + .0001) + u = np.array([ 26.55951566]) + np.testing.assert_allclose(reg.u[0],u,RTOL) + e = np.array([ 31.23925425]) + np.testing.assert_allclose(reg.e_filtered[0],e,RTOL) + predy = np.array([ 53.9074875]) + np.testing.assert_allclose(reg.predy[0],predy,RTOL) n = 49 - np.testing.assert_allclose(reg.n, n, RTOL) + np.testing.assert_allclose(reg.n,n,RTOL) k = 3 - np.testing.assert_allclose(reg.k, k, RTOL) - y = np.array([80.467003]) - np.testing.assert_allclose(reg.y[0], y, RTOL) - x = np.array([1.0, 19.531]) - np.testing.assert_allclose(reg.x.toarray()[0], x, RTOL) - yend = np.array([15.72598]) - np.testing.assert_allclose(reg.yend[0], yend, RTOL) - z = np.array([1.0, 19.531, 15.72598]) - np.testing.assert_allclose(reg.z.toarray()[0], z, RTOL) + np.testing.assert_allclose(reg.k,k,RTOL) + y = np.array([ 80.467003]) + np.testing.assert_allclose(reg.y[0],y,RTOL) + x = np.array([ 1. , 19.531]) + np.testing.assert_allclose(reg.x.toarray()[0],x,RTOL) + yend = np.array([ 15.72598]) + np.testing.assert_allclose(reg.yend[0],yend,RTOL) + z = np.array([ 1. , 19.531 , 15.72598]) + np.testing.assert_allclose(reg.z.toarray()[0],z,RTOL) my = 38.43622446938776 - np.testing.assert_allclose(reg.mean_y, my, RTOL) - # std_y + np.testing.assert_allclose(reg.mean_y,my,RTOL) + #std_y sy = 18.466069465206047 - np.testing.assert_allclose(reg.std_y, sy, RTOL) - # vm - vm = np.array( - [ - [529.15840986, -15.78336736, -8.38021053], - [-15.78336736, 0.54023504, 0.23112032], - [-8.38021053, 0.23112032, 0.14497738], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) + np.testing.assert_allclose(reg.std_y,sy,RTOL) + #vm + vm = np.array([[ 529.15840986, -15.78336736, -8.38021053], + [ -15.78336736, 0.54023504, 0.23112032], + [ -8.38021053, 0.23112032, 0.14497738]]) + np.testing.assert_allclose(reg.vm,vm,RTOL) sig2 = 192.5002 - np.testing.assert_allclose(reg.sig2, sig2, RTOL) - + np.testing.assert_allclose(reg.sig2,sig2,RTOL) class TestGMEndogError(unittest.TestCase): def setUp(self): - db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + db=libpysal.io.open(libpysal.examples.get_path("columbus.dbf"),"r") y = np.array(db.by_col("HOVAL")) - self.y = np.reshape(y, (49, 1)) + self.y = np.reshape(y, (49,1)) X = [] X.append(db.by_col("INC")) self.X = np.array(X).T @@ -200,185 +168,109 @@ def setUp(self): q = [] q.append(db.by_col("DISCBD")) self.q = np.array(q).T - self.w = libpysal.weights.Rook.from_shapefile( - libpysal.examples.get_path("columbus.shp") - ) - self.w.transform = "r" + self.w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + self.w.transform = 'r' def test_model(self): reg = SP.GM_Endog_Error(self.y, self.X, self.yd, self.q, self.w) - betas = np.array([[55.36095292], [0.46411479], [-0.66883535], [0.38989939]]) - print("Running reduced-tolernace test in L181 of test_error_sp_sparse.py") - np.testing.assert_allclose(reg.betas, betas, RTOL + 0.0001) - u = np.array([26.55951566]) - np.testing.assert_allclose(reg.u[0], u, RTOL) - e = np.array([31.23925425]) - np.testing.assert_allclose(reg.e_filtered[0], e, RTOL) - predy = np.array([53.9074875]) - np.testing.assert_allclose(reg.predy[0], predy, RTOL) + betas = np.array([[ 55.36095292], [ 0.46411479], [ -0.66883535], [ 0.38989939]]) + print('Running reduced-tolernace test in L181 of test_error_sp_sparse.py') + np.testing.assert_allclose(reg.betas,betas,RTOL + .0001) + u = np.array([ 26.55951566]) + np.testing.assert_allclose(reg.u[0],u,RTOL) + e = np.array([ 31.23925425]) + np.testing.assert_allclose(reg.e_filtered[0],e,RTOL) + predy = np.array([ 53.9074875]) + np.testing.assert_allclose(reg.predy[0],predy,RTOL) n = 49 - np.testing.assert_allclose(reg.n, n, RTOL) + np.testing.assert_allclose(reg.n,n,RTOL) k = 3 - np.testing.assert_allclose(reg.k, k, RTOL) - y = np.array([80.467003]) - np.testing.assert_allclose(reg.y[0], y, RTOL) - x = np.array([1.0, 19.531]) - np.testing.assert_allclose(reg.x.toarray()[0], x, RTOL) - yend = np.array([15.72598]) - np.testing.assert_allclose(reg.yend[0], yend, RTOL) - z = np.array([1.0, 19.531, 15.72598]) - np.testing.assert_allclose(reg.z.toarray()[0], z, RTOL) + np.testing.assert_allclose(reg.k,k,RTOL) + y = np.array([ 80.467003]) + np.testing.assert_allclose(reg.y[0],y,RTOL) + x = np.array([ 1. , 19.531]) + np.testing.assert_allclose(reg.x.toarray()[0],x,RTOL) + yend = np.array([ 15.72598]) + np.testing.assert_allclose(reg.yend[0],yend,RTOL) + z = np.array([ 1. , 19.531 , 15.72598]) + np.testing.assert_allclose(reg.z.toarray()[0],z,RTOL) my = 38.43622446938776 - np.testing.assert_allclose(reg.mean_y, my, RTOL) + np.testing.assert_allclose(reg.mean_y,my,RTOL) sy = 18.466069465206047 - np.testing.assert_allclose(reg.std_y, sy, RTOL) - vm = np.array( - [ - [529.15840986, -15.78336736, -8.38021053], - [-15.78336736, 0.54023504, 0.23112032], - [-8.38021053, 0.23112032, 0.14497738], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) + np.testing.assert_allclose(reg.std_y,sy,RTOL) + vm = np.array([[ 529.15840986, -15.78336736, -8.38021053], + [ -15.78336736, 0.54023504, 0.23112032], + [ -8.38021053, 0.23112032, 0.14497738]]) + np.testing.assert_allclose(reg.vm,vm,RTOL) pr2 = 0.346472557570858 - np.testing.assert_allclose(reg.pr2, pr2, RTOL) + np.testing.assert_allclose(reg.pr2,pr2,RTOL) sig2 = 192.5002 - np.testing.assert_allclose(reg.sig2, sig2, RTOL) - std_err = np.array([23.003401, 0.73500657, 0.38075777]) - np.testing.assert_allclose(reg.std_err, std_err, RTOL) - z_stat = np.array( - [ - [2.40664208, 0.01609994], - [0.63144305, 0.52775088], - [-1.75659016, 0.07898769], - ] - ) - np.testing.assert_allclose(reg.z_stat, z_stat, RTOL) - - -class TestBaseGMCombo(unittest.TestCase): - def setUp(self): - db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") - y = np.array(db.by_col("HOVAL")) - self.y = np.reshape(y, (49, 1)) - X = [] - X.append(db.by_col("INC")) - X.append(db.by_col("CRIME")) - self.X = np.array(X).T - self.w = libpysal.weights.Rook.from_shapefile( - libpysal.examples.get_path("columbus.shp") - ) - self.w.transform = "r" - - def test_model(self): - # Only spatial lag - yd2, q2 = utils.set_endog(self.y, self.X, self.w, None, None, 1, True) - self.X = np.hstack((np.ones(self.y.shape), self.X)) - self.X = sparse.csr_matrix(self.X) - reg = SP.BaseGM_Combo(self.y, self.X, yend=yd2, q=q2, w=self.w.sparse) - betas = np.array( - [[57.61123461], [0.73441314], [-0.59459416], [-0.21762921], [0.54732051]] - ) - np.testing.assert_allclose(reg.betas, betas, RTOL) - u = np.array([25.57932637]) - np.testing.assert_allclose(reg.u[0], u, RTOL) - e_filtered = np.array([31.65374945]) - np.testing.assert_allclose(reg.e_filtered[0], e_filtered, RTOL) - predy = np.array([54.88767663]) - np.testing.assert_allclose(reg.predy[0], predy, RTOL) - n = 49 - np.testing.assert_allclose(reg.n, n, RTOL) - k = 4 - np.testing.assert_allclose(reg.k, k, RTOL) - y = np.array([80.467003]) - np.testing.assert_allclose(reg.y[0], y, RTOL) - x = np.array([1.0, 19.531, 15.72598]) - np.testing.assert_allclose(reg.x.toarray()[0], x, RTOL) - yend = np.array([35.4585005]) - np.testing.assert_allclose(reg.yend[0], yend, RTOL) - z = np.array([1.0, 19.531, 15.72598, 35.4585005]) - np.testing.assert_allclose(reg.z.toarray()[0], z, RTOL) - my = 38.43622446938776 - np.testing.assert_allclose(reg.mean_y, my, RTOL) - sy = 18.466069465206047 - np.testing.assert_allclose(reg.std_y, sy, RTOL) - vm = np.array([22.85691168, 0.48786563, 0.17914357, 0.46449287]) - np.testing.assert_allclose(np.sqrt(reg.vm.diagonal()), vm, RTOL) - sig2 = 181.78650186468832 - np.testing.assert_allclose(reg.sig2, sig2, RTOL) + np.testing.assert_allclose(reg.sig2,sig2,RTOL) + std_err = np.array([ 23.003401 , 0.73500657, 0.38075777]) + np.testing.assert_allclose(reg.std_err,std_err,RTOL) + z_stat = np.array([[ 2.40664208, 0.01609994], [ 0.63144305, 0.52775088], [-1.75659016, 0.07898769]]) + np.testing.assert_allclose(reg.z_stat,z_stat,RTOL) class TestGMCombo(unittest.TestCase): def setUp(self): - db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + db=libpysal.io.open(libpysal.examples.get_path("columbus.dbf"),"r") y = np.array(db.by_col("HOVAL")) - self.y = np.reshape(y, (49, 1)) + self.y = np.reshape(y, (49,1)) X = [] X.append(db.by_col("INC")) X.append(db.by_col("CRIME")) self.X = np.array(X).T self.X = sparse.csr_matrix(self.X) - self.w = libpysal.weights.Rook.from_shapefile( - libpysal.examples.get_path("columbus.shp") - ) - self.w.transform = "r" - + self.w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + self.w.transform = 'r' def test_model(self): # Only spatial lag reg = SP.GM_Combo(self.y, self.X, w=self.w) - e_reduced = np.array([28.18617481]) - np.testing.assert_allclose(reg.e_pred[0], e_reduced, RTOL) - predy_e = np.array([52.28082782]) - np.testing.assert_allclose(reg.predy_e[0], predy_e, RTOL) - betas = np.array( - [[57.61123515], [0.73441313], [-0.59459416], [-0.21762921], [0.54732051]] - ) - np.testing.assert_allclose(reg.betas, betas, RTOL) - u = np.array([25.57932637]) - np.testing.assert_allclose(reg.u[0], u, RTOL) - e_filtered = np.array([31.65374945]) - np.testing.assert_allclose(reg.e_filtered[0], e_filtered, RTOL) - predy = np.array([54.88767685]) - np.testing.assert_allclose(reg.predy[0], predy, RTOL) + e_reduced = np.array([ 28.18617481]) + np.testing.assert_allclose(reg.e_pred[0],e_reduced,RTOL) + predy_e = np.array([ 52.28082782]) + np.testing.assert_allclose(reg.predy_e[0],predy_e,RTOL) + betas = np.array([[ 57.61123515],[ 0.73441313], [ -0.59459416], [ -0.21762921], [ 0.54732051]]) + np.testing.assert_allclose(reg.betas,betas,RTOL) + u = np.array([ 25.57932637]) + np.testing.assert_allclose(reg.u[0],u,RTOL) + e_filtered = np.array([ 31.65374945]) + np.testing.assert_allclose(reg.e_filtered[0],e_filtered,RTOL) + predy = np.array([ 54.88767685]) + np.testing.assert_allclose(reg.predy[0],predy,RTOL) n = 49 - np.testing.assert_allclose(reg.n, n, RTOL) + np.testing.assert_allclose(reg.n,n,RTOL) k = 4 - np.testing.assert_allclose(reg.k, k, RTOL) - y = np.array([80.467003]) - np.testing.assert_allclose(reg.y[0], y, RTOL) - x = np.array([1.0, 19.531, 15.72598]) - np.testing.assert_allclose(reg.x.toarray()[0], x, RTOL) - yend = np.array([35.4585005]) - np.testing.assert_allclose(reg.yend[0], yend, RTOL) - z = np.array([1.0, 19.531, 15.72598, 35.4585005]) - np.testing.assert_allclose(reg.z.toarray()[0], z, RTOL) + np.testing.assert_allclose(reg.k,k,RTOL) + y = np.array([ 80.467003]) + np.testing.assert_allclose(reg.y[0],y,RTOL) + x = np.array([ 1. , 19.531 , 15.72598]) + np.testing.assert_allclose(reg.x.toarray()[0],x,RTOL) + yend = np.array([ 35.4585005]) + np.testing.assert_allclose(reg.yend[0],yend,RTOL) + z = np.array([ 1. , 19.531 , 15.72598 , 35.4585005]) + np.testing.assert_allclose(reg.z.toarray()[0],z,RTOL) my = 38.43622446938776 - np.testing.assert_allclose(reg.mean_y, my, RTOL) + np.testing.assert_allclose(reg.mean_y,my,RTOL) sy = 18.466069465206047 - np.testing.assert_allclose(reg.std_y, sy, RTOL) - vm = np.array([22.85691168, 0.48786563, 0.17914357, 0.46449287]) + np.testing.assert_allclose(reg.std_y,sy,RTOL) + vm = np.array([22.85691168, 0.48786563, 0.17914357, 0.46449287]) np.testing.assert_allclose(np.sqrt(reg.vm.diagonal()), vm, RTOL) sig2 = 181.78650186468832 - np.testing.assert_allclose(reg.sig2, sig2, RTOL) + np.testing.assert_allclose(reg.sig2,sig2,RTOL) pr2 = 0.3018280166937799 - np.testing.assert_allclose(reg.pr2, pr2, RTOL) + np.testing.assert_allclose(reg.pr2,pr2,RTOL) pr2_e = 0.3561355587000738 - np.testing.assert_allclose(reg.pr2_e, pr2_e, RTOL) - std_err = np.array([22.85692222, 0.48786559, 0.17914356, 0.46449318]) - np.testing.assert_allclose(reg.std_err, std_err, RTOL) - z_stat = np.array( - [ - [2.52051597e00, 1.17182922e-02], - [1.50535954e00, 1.32231664e-01], - [-3.31909311e00, 9.03103123e-04], - [-4.68530506e-01, 6.39405261e-01], - ] - ) - np.testing.assert_allclose(reg.z_stat, z_stat, RTOL) - + np.testing.assert_allclose(reg.pr2_e,pr2_e,RTOL) + std_err = np.array([ 22.85692222, 0.48786559, 0.17914356, 0.46449318]) + np.testing.assert_allclose(reg.std_err,std_err,RTOL) + z_stat = np.array([[ 2.52051597e+00, 1.17182922e-02], [ 1.50535954e+00, 1.32231664e-01], [ -3.31909311e+00, 9.03103123e-04], [ -4.68530506e-01, 6.39405261e-01]]) + np.testing.assert_allclose(reg.z_stat,z_stat,RTOL) -if __name__ == "__main__": - start_suppress = np.get_printoptions()["suppress"] - np.set_printoptions(suppress=True) +if __name__ == '__main__': + start_suppress = np.get_printoptions()['suppress'] + np.set_printoptions(suppress=True) unittest.main() np.set_printoptions(suppress=start_suppress) + diff --git a/spreg/tests/test_twosls_sp_regimes.py b/spreg/tests/test_twosls_sp_regimes.py index a264a973..f763ac74 100644 --- a/spreg/tests/test_twosls_sp_regimes.py +++ b/spreg/tests/test_twosls_sp_regimes.py @@ -6,527 +6,349 @@ from spreg.twosls_sp import GM_Lag from libpysal.common import RTOL - class TestGMLag_Regimes(unittest.TestCase): def setUp(self): - self.w = libpysal.weights.Queen.from_shapefile( - libpysal.examples.get_path("columbus.shp") - ) - self.w.transform = "r" - self.db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + self.w = libpysal.weights.Queen.from_shapefile(libpysal.examples.get_path("columbus.shp")) + self.w.transform = 'r' + self.db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), 'r') y = np.array(self.db.by_col("CRIME")) - self.y = np.reshape(y, (49, 1)) - self.r_var = "NSA" + self.y = np.reshape(y, (49,1)) + self.r_var = 'NSA' self.regimes = self.db.by_col(self.r_var) def test___init__(self): - # Matches SpaceStat + #Matches SpaceStat X = [] X.append(self.db.by_col("INC")) X.append(self.db.by_col("HOVAL")) self.X = np.array(X).T - reg = GM_Lag_Regimes( - self.y, - self.X, - self.regimes, - w=self.w, - sig2n_k=True, - regime_lag_sep=False, - regime_err_sep=False, - ) - betas = np.array( - [ - [45.14892906], - [-1.42593383], - [-0.11501037], - [40.99023016], - [-0.81498302], - [-0.28391409], - [0.4736163], - ] - ) - np.testing.assert_allclose(reg.betas, betas, RTOL) - e_5 = np.array( - [[-1.47960519], [-7.93748769], [-5.88561835], [-13.37941105], [5.2524303]] - ) - np.testing.assert_allclose(reg.e_pred[0:5], e_5, RTOL) - h_0 = np.array( - [[0.0, 0.0, 0.0, 1.0, 19.531, 80.467003, 0.0, 0.0, 18.594, 35.4585005]] - ) - np.testing.assert_allclose(reg.h[0] * np.eye(10), h_0) + reg = GM_Lag_Regimes(self.y, self.X, self.regimes, w=self.w, sig2n_k=True, regime_lag_sep=False, regime_err_sep=False) + betas = np.array([[ 45.14892906], + [ -1.42593383], + [ -0.11501037], + [ 40.99023016], + [ -0.81498302], + [ -0.28391409], + [ 0.4736163 ]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + e_5 = np.array([[ -1.47960519], + [ -7.93748769], + [ -5.88561835], + [-13.37941105], + [ 5.2524303 ]]) + np.testing.assert_allclose(reg.e_pred[0:5], e_5,RTOL) + h_0 = np.array([[ 0. , 0. , 0. , 1. , 19.531 , + 80.467003 , 0. , 0. , 18.594 , 35.4585005]]) + np.testing.assert_allclose(reg.h[0]*np.eye(10), h_0) self.assertEqual(reg.k, 7) self.assertEqual(reg.kstar, 1) - np.testing.assert_allclose(reg.mean_y, 35.128823897959187, RTOL) + np.testing.assert_allclose(reg.mean_y, 35.128823897959187,RTOL) self.assertEqual(reg.n, 49) - np.testing.assert_allclose(reg.pr2, 0.6572182131915739, RTOL) - np.testing.assert_allclose(reg.pr2_e, 0.5779687278635434, RTOL) - pfora1a2 = np.array( - [ - -2.15017629, - -0.30169328, - -0.07603704, - -22.06541809, - 0.45738058, - 0.02805828, - 0.39073923, - ] - ) - np.testing.assert_allclose(reg.pfora1a2[0], pfora1a2, RTOL) - predy_5 = np.array( - [[13.93216104], [23.46424269], [34.43510955], [44.32473878], [44.39117516]] - ) - np.testing.assert_allclose(reg.predy[0:5], predy_5, RTOL) - predy_e_5 = np.array( - [[17.20558519], [26.73924169], [36.51239935], [45.76717105], [45.4790797]] - ) - np.testing.assert_allclose(reg.predy_e[0:5], predy_e_5, RTOL) - q_5 = np.array([[0.0, 0.0, 18.594, 35.4585005]]) - np.testing.assert_allclose(reg.q[0] * np.eye(4), q_5, RTOL) - self.assertEqual(reg.robust, "unadjusted") - np.testing.assert_allclose(reg.sig2n_k, 109.76462904625834, RTOL) - np.testing.assert_allclose(reg.sig2n, 94.08396775393571, RTOL) - np.testing.assert_allclose(reg.sig2, 109.76462904625834, RTOL) - np.testing.assert_allclose(reg.std_y, 16.732092091229699, RTOL) - u_5 = np.array( - [[1.79381896], [-4.66248869], [-3.80832855], [-11.93697878], [6.34033484]] - ) - np.testing.assert_allclose(reg.u[0:5], u_5, RTOL) - np.testing.assert_allclose(reg.utu, 4610.11441994285, RTOL) - varb = np.array( - [ - 1.23841820e00, - -3.65620114e-02, - -1.21919663e-03, - 1.00057547e00, - -2.07403182e-02, - -1.27232693e-03, - -1.77184084e-02, - ] - ) - np.testing.assert_allclose(reg.varb[0], varb, RTOL) - vm = np.array( - [ - 1.35934514e02, - -4.01321561e00, - -1.33824666e-01, - 1.09827796e02, - -2.27655334e00, - -1.39656494e-01, - -1.94485452e00, - ] - ) - np.testing.assert_allclose(reg.vm[0], vm, RTOL) - x_0 = np.array([[0.0, 0.0, 0.0, 1.0, 19.531, 80.467003]]) - np.testing.assert_allclose(reg.x[0] * np.eye(6), x_0, RTOL) - y_5 = np.array([[15.72598], [18.801754], [30.626781], [32.38776], [50.73151]]) - np.testing.assert_allclose(reg.y[0:5], y_5, RTOL) - yend_5 = np.array( - [[24.7142675], [26.24684033], [29.411751], [34.64647575], [40.4653275]] - ) - np.testing.assert_allclose(reg.yend[0:5] * np.array([[1]]), yend_5, RTOL) - z_0 = np.array([[0.0, 0.0, 0.0, 1.0, 19.531, 80.467003, 24.7142675]]) - np.testing.assert_allclose(reg.z[0] * np.eye(7), z_0, RTOL) - zthhthi = np.array( - [ - 1.00000000e00, - -2.35922393e-16, - 5.55111512e-17, - 0.00000000e00, - 0.00000000e00, - 0.00000000e00, - -4.44089210e-16, - 2.22044605e-16, - 0.00000000e00, - 0.00000000e00, - ] - ) + np.testing.assert_allclose(reg.pr2, 0.6572182131915739,RTOL) + np.testing.assert_allclose(reg.pr2_e, 0.5779687278635434,RTOL) + pfora1a2 = np.array([ -2.15017629, -0.30169328, -0.07603704, -22.06541809, + 0.45738058, 0.02805828, 0.39073923]) + np.testing.assert_allclose(reg.pfora1a2[0], pfora1a2,RTOL) + predy_5 = np.array([[ 13.93216104], + [ 23.46424269], + [ 34.43510955], + [ 44.32473878], + [ 44.39117516]]) + np.testing.assert_allclose(reg.predy[0:5], predy_5,RTOL) + predy_e_5 = np.array([[ 17.20558519], + [ 26.73924169], + [ 36.51239935], + [ 45.76717105], + [ 45.4790797 ]]) + np.testing.assert_allclose(reg.predy_e[0:5], predy_e_5,RTOL) + q_5 = np.array([[ 0. , 0. , 18.594 , 35.4585005]]) + np.testing.assert_allclose(reg.q[0]*np.eye(4), q_5, RTOL) + self.assertEqual(reg.robust, 'unadjusted') + np.testing.assert_allclose(reg.sig2n_k, 109.76462904625834,RTOL) + np.testing.assert_allclose(reg.sig2n, 94.08396775393571,RTOL) + np.testing.assert_allclose(reg.sig2, 109.76462904625834,RTOL) + np.testing.assert_allclose(reg.std_y, 16.732092091229699,RTOL) + u_5 = np.array([[ 1.79381896], + [ -4.66248869], + [ -3.80832855], + [-11.93697878], + [ 6.34033484]]) + np.testing.assert_allclose(reg.u[0:5], u_5,RTOL) + np.testing.assert_allclose(reg.utu, 4610.11441994285,RTOL) + varb = np.array([ 1.23841820e+00, -3.65620114e-02, -1.21919663e-03, + 1.00057547e+00, -2.07403182e-02, -1.27232693e-03, + -1.77184084e-02]) + np.testing.assert_allclose(reg.varb[0], varb,RTOL) + vm = np.array([ 1.35934514e+02, -4.01321561e+00, -1.33824666e-01, + 1.09827796e+02, -2.27655334e+00, -1.39656494e-01, + -1.94485452e+00]) + np.testing.assert_allclose(reg.vm[0], vm,RTOL) + x_0 = np.array([[ 0. , 0. , 0. , 1. , 19.531 , + 80.467003]]) + np.testing.assert_allclose(reg.x[0]*np.eye(6), x_0,RTOL) + y_5 = np.array([[ 15.72598 ], + [ 18.801754], + [ 30.626781], + [ 32.38776 ], + [ 50.73151 ]]) + np.testing.assert_allclose(reg.y[0:5], y_5,RTOL) + yend_5 = np.array([[ 24.7142675 ], + [ 26.24684033], + [ 29.411751 ], + [ 34.64647575], + [ 40.4653275 ]]) + np.testing.assert_allclose(reg.yend[0:5]*np.array([[1]]), yend_5,RTOL) + z_0 = np.array([[ 0. , 0. , 0. , 1. , 19.531 , + 80.467003 , 24.7142675]]) + np.testing.assert_allclose(reg.z[0]*np.eye(7), z_0,RTOL) + zthhthi = np.array([ 1.00000000e+00, -2.35922393e-16, 5.55111512e-17, + 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, + -4.44089210e-16, 2.22044605e-16, 0.00000000e+00, + 0.00000000e+00]) # np.testing.assert_allclose(reg.zthhthi[0], zthhthi, RTOL) np.testing.assert_array_almost_equal(reg.zthhthi[0], zthhthi) - chow_regi = np.array( - [[0.19692667, 0.65721307], [0.5666492, 0.45159351], [0.45282066, 0.5009985]] - ) - np.testing.assert_allclose(reg.chow.regi, chow_regi, RTOL) - np.testing.assert_allclose(reg.chow.joint[0], 0.82409867601863462, RTOL) - + chow_regi = np.array([[ 0.19692667, 0.65721307], + [ 0.5666492 , 0.45159351], + [ 0.45282066, 0.5009985 ]]) + np.testing.assert_allclose(reg.chow.regi, chow_regi,RTOL) + np.testing.assert_allclose(reg.chow.joint[0], 0.82409867601863462,RTOL) + def test_init_discbd(self): - # Matches SpaceStat. + #Matches SpaceStat. X = np.array(self.db.by_col("INC")) - X = np.reshape(X, (49, 1)) + X = np.reshape(X, (49,1)) yd = np.array(self.db.by_col("HOVAL")) - yd = np.reshape(yd, (49, 1)) + yd = np.reshape(yd, (49,1)) q = np.array(self.db.by_col("DISCBD")) - q = np.reshape(q, (49, 1)) - reg = GM_Lag_Regimes( - self.y, - X, - self.regimes, - yend=yd, - q=q, - lag_q=False, - w=self.w, - sig2n_k=True, - regime_lag_sep=False, - regime_err_sep=False, - ) - tbetas = np.array( - [ - [42.7266306], - [-0.15552345], - [37.70545276], - [-0.5341577], - [-0.68305796], - [-0.37106077], - [0.55809516], - ] - ) - np.testing.assert_allclose(tbetas, reg.betas, RTOL) - vm = np.array( - [ - 270.62979422, - 3.62539081, - 327.89638627, - 6.24949355, - -5.25333106, - -6.01743515, - -4.19290074, - ] - ) - np.testing.assert_allclose(reg.vm[0], vm, RTOL) - e_3 = np.array([[-0.33142796], [-9.51719607], [-7.86272153]]) - np.testing.assert_allclose(reg.e_pred[0:3], e_3, RTOL) - u_3 = np.array([[4.51839601], [-5.67363147], [-5.1927562]]) - np.testing.assert_allclose(reg.u[0:3], u_3, RTOL) - predy_3 = np.array([[11.20758399], [24.47538547], [35.8195372]]) - np.testing.assert_allclose(reg.predy[0:3], predy_3, RTOL) - predy_e_3 = np.array([[16.05740796], [28.31895007], [38.48950253]]) - np.testing.assert_allclose(reg.predy_e[0:3], predy_e_3, RTOL) - chow_regi = np.array( - [ - [0.13130991, 0.71707772], - [0.04740966, 0.82763357], - [0.15474413, 0.6940423], - ] - ) - np.testing.assert_allclose(reg.chow.regi, chow_regi, RTOL) - np.testing.assert_allclose(reg.chow.joint[0], 0.31248100032096549, RTOL) - + q = np.reshape(q, (49,1)) + reg = GM_Lag_Regimes(self.y, X, self.regimes, yend=yd, q=q, lag_q=False, w=self.w, sig2n_k=True, regime_lag_sep=False, regime_err_sep=False) + tbetas = np.array([[ 42.7266306 ], + [ -0.15552345], + [ 37.70545276], + [ -0.5341577 ], + [ -0.68305796], + [ -0.37106077], + [ 0.55809516]]) + np.testing.assert_allclose(tbetas, reg.betas,RTOL) + vm = np.array([ 270.62979422, 3.62539081, 327.89638627, 6.24949355, + -5.25333106, -6.01743515, -4.19290074]) + np.testing.assert_allclose(reg.vm[0], vm,RTOL) + e_3 = np.array([[-0.33142796], + [-9.51719607], + [-7.86272153]]) + np.testing.assert_allclose(reg.e_pred[0:3], e_3,RTOL) + u_3 = np.array([[ 4.51839601], + [-5.67363147], + [-5.1927562 ]]) + np.testing.assert_allclose(reg.u[0:3], u_3,RTOL) + predy_3 = np.array([[ 11.20758399], + [ 24.47538547], + [ 35.8195372 ]]) + np.testing.assert_allclose(reg.predy[0:3], predy_3,RTOL) + predy_e_3 = np.array([[ 16.05740796], + [ 28.31895007], + [ 38.48950253]]) + np.testing.assert_allclose(reg.predy_e[0:3], predy_e_3,RTOL) + chow_regi = np.array([[ 0.13130991, 0.71707772], + [ 0.04740966, 0.82763357], + [ 0.15474413, 0.6940423 ]]) + np.testing.assert_allclose(reg.chow.regi, chow_regi,RTOL) + np.testing.assert_allclose(reg.chow.joint[0], 0.31248100032096549,RTOL) + def test_lag_q(self): X = np.array(self.db.by_col("INC")) - X = np.reshape(X, (49, 1)) + X = np.reshape(X, (49,1)) yd = np.array(self.db.by_col("HOVAL")) - yd = np.reshape(yd, (49, 1)) + yd = np.reshape(yd, (49,1)) q = np.array(self.db.by_col("DISCBD")) - q = np.reshape(q, (49, 1)) - reg = GM_Lag_Regimes( - self.y, - X, - self.regimes, - yend=yd, - q=q, - w=self.w, - sig2n_k=True, - regime_lag_sep=False, - regime_err_sep=False, - ) - tbetas = np.array( - [ - [37.87698329], - [-0.89426982], - [31.4714777], - [-0.71640525], - [-0.28494432], - [-0.2294271], - [0.62996544], - ] - ) - np.testing.assert_allclose(tbetas, reg.betas, RTOL) - vm = np.array( - [ - 128.25714554, - -0.38975354, - 95.7271044, - -1.8429218, - -1.75331978, - -0.18240338, - -1.67767464, - ] - ) - np.testing.assert_allclose(reg.vm[0], vm, RTOL) - chow_regi = np.array( - [ - [0.43494049, 0.50957463], - [0.02089281, 0.88507135], - [0.01180501, 0.91347943], - ] - ) - np.testing.assert_allclose(reg.chow.regi, chow_regi, RTOL) - np.testing.assert_allclose(reg.chow.joint[0], 0.54288190938307757, RTOL) - + q = np.reshape(q, (49,1)) + reg = GM_Lag_Regimes(self.y, X, self.regimes, yend=yd, q=q, w=self.w, sig2n_k=True, regime_lag_sep=False, regime_err_sep=False) + tbetas = np.array([[ 37.87698329], + [ -0.89426982], + [ 31.4714777 ], + [ -0.71640525], + [ -0.28494432], + [ -0.2294271 ], + [ 0.62996544]]) + np.testing.assert_allclose(tbetas, reg.betas,RTOL) + vm = np.array([ 128.25714554, -0.38975354, 95.7271044 , -1.8429218 , + -1.75331978, -0.18240338, -1.67767464]) + np.testing.assert_allclose(reg.vm[0], vm,RTOL) + chow_regi = np.array([[ 0.43494049, 0.50957463], + [ 0.02089281, 0.88507135], + [ 0.01180501, 0.91347943]]) + np.testing.assert_allclose(reg.chow.regi, chow_regi,RTOL) + np.testing.assert_allclose(reg.chow.joint[0], 0.54288190938307757,RTOL) + def test_all_regi(self): X = np.array(self.db.by_col("INC")) - X = np.reshape(X, (49, 1)) + X = np.reshape(X, (49,1)) yd = np.array(self.db.by_col("HOVAL")) - yd = np.reshape(yd, (49, 1)) + yd = np.reshape(yd, (49,1)) q = np.array(self.db.by_col("DISCBD")) - q = np.reshape(q, (49, 1)) - reg = GM_Lag_Regimes( - self.y, - X, - self.regimes, - yend=yd, - q=q, - w=self.w, - regime_lag_sep=False, - regime_err_sep=True, - ) - tbetas = np.array( - [ - [ - 37.87698329, - -0.89426982, - 31.4714777, - -0.71640525, - -0.28494432, - -0.2294271, - 0.62996544, - ] - ] - ) - np.testing.assert_allclose(tbetas, reg.betas.T, RTOL) - vm = np.array( - [ - 70.38291551, - -0.64868787, - 49.25453215, - -0.62851534, - -0.75413453, - -0.12674433, - -0.97179236, - ] - ) - np.testing.assert_allclose(reg.vm[0], vm, RTOL) - e_3 = np.array([[-2.66997799], [-7.69786264], [-4.39412782]]) - np.testing.assert_allclose(reg.e_pred[0:3], e_3, RTOL) - u_3 = np.array([[1.13879007], [-3.76873198], [-1.89671717]]) - np.testing.assert_allclose(reg.u[0:3], u_3, RTOL) - predy_3 = np.array([[14.58718993], [22.57048598], [32.52349817]]) - np.testing.assert_allclose(reg.predy[0:3], predy_3, RTOL) - predy_e_3 = np.array([[18.39595799], [26.49961664], [35.02090882]]) - np.testing.assert_allclose(reg.predy_e[0:3], predy_e_3, RTOL) - chow_regi = np.array( - [ - [0.60091096, 0.43823066], - [0.03006744, 0.8623373], - [0.01943727, 0.88912016], - ] - ) - np.testing.assert_allclose(reg.chow.regi, chow_regi, RTOL) - np.testing.assert_allclose(reg.chow.joint[0], 0.88634854058300516, RTOL) - + q = np.reshape(q, (49,1)) + reg = GM_Lag_Regimes(self.y, X, self.regimes, yend=yd, q=q, w=self.w, regime_lag_sep=False, regime_err_sep=False) + tbetas = np.array([[ 37.87698329, -0.89426982, 31.4714777 , -0.71640525, + -0.28494432, -0.2294271 , 0.62996544]]) + np.testing.assert_allclose(tbetas, reg.betas.T,RTOL) + vm = np.array([109.934696, -0.334074, 82.051804, -1.579647, -1.502846, + -0.156346, -1.438007]) + np.testing.assert_allclose(reg.vm[0], vm,RTOL) + e_3 = np.array([[-2.66997799], + [-7.69786264], + [-4.39412782]]) + np.testing.assert_allclose(reg.e_pred[0:3], e_3,RTOL) + u_3 = np.array([[ 1.13879007], + [-3.76873198], + [-1.89671717]]) + np.testing.assert_allclose(reg.u[0:3], u_3,RTOL) + predy_3 = np.array([[ 14.58718993], + [ 22.57048598], + [ 32.52349817]]) + np.testing.assert_allclose(reg.predy[0:3], predy_3,RTOL) + predy_e_3 = np.array([[ 18.39595799], + [ 26.49961664], + [ 35.02090882]]) + np.testing.assert_allclose(reg.predy_e[0:3], predy_e_3,RTOL) + chow_regi = np.array([[0.507431, 0.476253], + [0.024375, 0.875935], + [0.013773, 0.906578]]) + np.testing.assert_allclose(reg.chow.regi, chow_regi,1e-04) + np.testing.assert_allclose(reg.chow.joint[0], 0.633362,RTOL) + def test_all_regi_sig2(self): - # Artficial: + #Artficial: n = 256 - x1 = np.random.uniform(-10, 10, (n, 1)) - x2 = np.random.uniform(1, 5, (n, 1)) - q = x2 + np.random.normal(0, 1, (n, 1)) - x = np.hstack((x1, x2)) - y = np.dot( - np.hstack((np.ones((n, 1)), x)), np.array([[1], [0.5], [2]]) - ) + np.random.normal(0, 1, (n, 1)) + x1 = np.random.uniform(-10,10,(n,1)) + x2 = np.random.uniform(1,5,(n,1)) + q = x2 + np.random.normal(0,1,(n,1)) + x = np.hstack((x1,x2)) + y = np.dot(np.hstack((np.ones((n,1)),x)),np.array([[1],[0.5],[2]])) + np.random.normal(0,1,(n,1)) latt = int(np.sqrt(n)) - w = libpysal.weights.util.lat2W(latt, latt) - w.transform = "r" - regi = [0] * (n // 2) + [1] * (n // 2) - model = GM_Lag_Regimes( - y, x1, regi, q=q, yend=x2, w=w, regime_lag_sep=True, regime_err_sep=True - ) - w1 = libpysal.weights.util.lat2W(latt // 2, latt) - w1.transform = "r" - model1 = GM_Lag( - y[0 : (n // 2)].reshape((n // 2), 1), - x1[0 : (n // 2)], - yend=x2[0 : (n // 2)], - q=q[0 : (n // 2)], - w=w1, - ) - model2 = GM_Lag( - y[(n // 2) : n].reshape((n // 2), 1), - x1[(n // 2) : n], - yend=x2[(n // 2) : n], - q=q[(n // 2) : n], - w=w1, - ) + w = libpysal.weights.util.lat2W(latt,latt) + w.transform='r' + regi = [0]*(n//2) + [1]*(n//2) + model = GM_Lag_Regimes(y, x1, regi, q=q, yend=x2, w=w, regime_lag_sep=True, regime_err_sep=True) + w1 = libpysal.weights.util.lat2W(latt//2,latt) + w1.transform='r' + model1 = GM_Lag(y[0:(n//2)].reshape((n//2),1), x1[0:(n//2)],yend=x2[0:(n//2)], q=q[0:(n//2)], w=w1) + model2 = GM_Lag(y[(n//2):n].reshape((n//2),1), x1[(n//2):n],yend=x2[(n//2):n], q=q[(n//2):n], w=w1) tbetas = np.vstack((model1.betas, model2.betas)) - np.testing.assert_allclose(model.betas, tbetas) - vm = np.hstack((model1.vm.diagonal(), model2.vm.diagonal())) - np.testing.assert_allclose(model.vm.diagonal(), vm, RTOL) - # Columbus: + np.testing.assert_allclose(model.betas,tbetas) + vm = np.hstack((model1.vm.diagonal(),model2.vm.diagonal())) + np.testing.assert_allclose(model.vm.diagonal(), vm,RTOL) + #Columbus: X = np.array(self.db.by_col("INC")) - X = np.reshape(X, (49, 1)) + X = np.reshape(X, (49,1)) yd = np.array(self.db.by_col("HOVAL")) - yd = np.reshape(yd, (49, 1)) + yd = np.reshape(yd, (49,1)) q = np.array(self.db.by_col("DISCBD")) - q = np.reshape(q, (49, 1)) - reg = GM_Lag_Regimes( - self.y, - X, - self.regimes, - yend=yd, - q=q, - w=self.w, - regime_lag_sep=True, - regime_err_sep=True, - ) - tbetas = np.array( - [ - [42.35827477], - [-0.09472413], - [-0.68794223], - [0.54482537], - [32.24228762], - [-0.12304063], - [-0.46840307], - [0.67108156], - ] - ) + q = np.reshape(q, (49,1)) + reg = GM_Lag_Regimes(self.y, X, self.regimes, yend=yd, q=q, w=self.w,regime_lag_sep=True, regime_err_sep = True) + tbetas = np.array([[ 42.35827477], + [ -0.09472413], + [ -0.68794223], + [ 0.54482537], + [ 32.24228762], + [ -0.12304063], + [ -0.46840307], + [ 0.67108156]]) np.testing.assert_allclose(tbetas, reg.betas) - vm = np.array( - [200.92894859, 4.56244927, -4.85603079, -2.9755413, 0.0, 0.0, 0.0, 0.0] - ) - np.testing.assert_allclose(reg.vm[0], vm, RTOL) - e_3 = np.array([[-1.32209547], [-13.15611199], [-11.62357696]]) - np.testing.assert_allclose(reg.e_pred[0:3], e_3, RTOL) - u_3 = np.array([[6.99250069], [-7.5665856], [-7.04753328]]) - np.testing.assert_allclose(reg.u[0:3], u_3, RTOL) - predy_3 = np.array([[8.73347931], [26.3683396], [37.67431428]]) - np.testing.assert_allclose(reg.predy[0:3], predy_3, RTOL) - predy_e_3 = np.array([[17.04807547], [31.95786599], [42.25035796]]) - np.testing.assert_allclose(reg.predy_e[0:3], predy_e_3, RTOL) - chow_regi = np.array( - [ - [1.51825373e-01, 6.96797034e-01], - [3.20105698e-04, 9.85725412e-01], - [8.58836996e-02, 7.69476896e-01], - [1.01357290e-01, 7.50206873e-01], - ] - ) - np.testing.assert_allclose(reg.chow.regi, chow_regi, RTOL) - np.testing.assert_allclose(reg.chow.joint[0], 0.38417230022512161, RTOL) + vm = np.array([ 200.92894859, 4.56244927, -4.85603079, -2.9755413 , + 0. , 0. , 0. , 0. ]) + np.testing.assert_allclose(reg.vm[0], vm,RTOL) + e_3 = np.array([[ -1.32209547], + [-13.15611199], + [-11.62357696]]) + np.testing.assert_allclose(reg.e_pred[0:3], e_3,RTOL) + u_3 = np.array([[ 6.99250069], + [-7.5665856 ], + [-7.04753328]]) + np.testing.assert_allclose(reg.u[0:3], u_3,RTOL) + predy_3 = np.array([[ 8.73347931], + [ 26.3683396 ], + [ 37.67431428]]) + np.testing.assert_allclose(reg.predy[0:3], predy_3,RTOL) + predy_e_3 = np.array([[ 17.04807547], + [ 31.95786599], + [ 42.25035796]]) + np.testing.assert_allclose(reg.predy_e[0:3], predy_e_3,RTOL) + chow_regi = np.array([[ 1.51825373e-01, 6.96797034e-01], + [ 3.20105698e-04, 9.85725412e-01], + [ 8.58836996e-02, 7.69476896e-01], + [ 1.01357290e-01, 7.50206873e-01]]) + np.testing.assert_allclose(reg.chow.regi, chow_regi,RTOL) + np.testing.assert_allclose(reg.chow.joint[0], 0.38417230022512161,RTOL) def test_fixed_const(self): X = np.array(self.db.by_col("INC")) - X = np.reshape(X, (49, 1)) + X = np.reshape(X, (49,1)) yd = np.array(self.db.by_col("HOVAL")) - yd = np.reshape(yd, (49, 1)) + yd = np.reshape(yd, (49,1)) q = np.array(self.db.by_col("DISCBD")) - q = np.reshape(q, (49, 1)) - reg = GM_Lag_Regimes( - self.y, - X, - self.regimes, - yend=yd, - q=q, - w=self.w, - constant_regi="one", - regime_lag_sep=False, - regime_err_sep=False, - ) - tbetas = np.array( - [ - [-0.37658823], - [-0.9666079], - [35.5445944], - [-0.45793559], - [-0.24216904], - [0.62500602], - ] - ) - np.testing.assert_allclose(tbetas, reg.betas, RTOL) - vm = np.array( - [1.4183697, -0.05975784, -0.27161863, -0.62517245, 0.02266177, 0.00312976] - ) - np.testing.assert_allclose(reg.vm[0], vm, RTOL) - e_3 = np.array([[0.17317815], [-5.53766328], [-3.82889307]]) - np.testing.assert_allclose(reg.e_pred[0:3], e_3, RTOL) - u_3 = np.array([[3.10025518], [-1.83150689], [-1.49598494]]) - np.testing.assert_allclose(reg.u[0:3], u_3, RTOL) - predy_3 = np.array([[12.62572482], [20.63326089], [32.12276594]]) - np.testing.assert_allclose(reg.predy[0:3], predy_3, RTOL) - predy_e_3 = np.array([[15.55280185], [24.33941728], [34.45567407]]) - np.testing.assert_allclose(reg.predy_e[0:3], predy_e_3, RTOL) - chow_regi = np.array( - [[1.85767047e-01, 6.66463269e-01], [1.19445012e01, 5.48089036e-04]] - ) - np.testing.assert_allclose(reg.chow.regi, chow_regi, RTOL) - np.testing.assert_allclose(reg.chow.joint[0], 12.017256217621382, RTOL) + q = np.reshape(q, (49,1)) + reg = GM_Lag_Regimes(self.y, X, self.regimes, yend=yd, q=q, w=self.w, constant_regi='one', regime_lag_sep=False, regime_err_sep=False) + tbetas = np.array([[ -0.37658823], + [ -0.9666079 ], + [ 35.5445944 ], + [ -0.45793559], + [ -0.24216904], + [ 0.62500602]]) + np.testing.assert_allclose(tbetas, reg.betas,RTOL) + vm = np.array([ 1.4183697 , -0.05975784, -0.27161863, -0.62517245, 0.02266177, + 0.00312976]) + np.testing.assert_allclose(reg.vm[0], vm,RTOL) + e_3 = np.array([[ 0.17317815], + [-5.53766328], + [-3.82889307]]) + np.testing.assert_allclose(reg.e_pred[0:3], e_3,RTOL) + u_3 = np.array([[ 3.10025518], + [-1.83150689], + [-1.49598494]]) + np.testing.assert_allclose(reg.u[0:3], u_3,RTOL) + predy_3 = np.array([[ 12.62572482], + [ 20.63326089], + [ 32.12276594]]) + np.testing.assert_allclose(reg.predy[0:3], predy_3,RTOL) + predy_e_3 = np.array([[ 15.55280185], + [ 24.33941728], + [ 34.45567407]]) + np.testing.assert_allclose(reg.predy_e[0:3], predy_e_3,RTOL) + chow_regi = np.array([[ 1.85767047e-01, 6.66463269e-01], + [ 1.19445012e+01, 5.48089036e-04]]) + np.testing.assert_allclose(reg.chow.regi, chow_regi,RTOL) + np.testing.assert_allclose(reg.chow.joint[0], 12.017256217621382,RTOL) def test_names(self): - y_var = "CRIME" - x_var = ["INC"] + y_var = 'CRIME' + x_var = ['INC'] x = np.array([self.db.by_col(name) for name in x_var]).T - yd_var = ["HOVAL"] + yd_var = ['HOVAL'] yd = np.array([self.db.by_col(name) for name in yd_var]).T - q_var = ["DISCBD"] + q_var = ['DISCBD'] q = np.array([self.db.by_col(name) for name in q_var]).T - r_var = "NSA" - reg = GM_Lag_Regimes( - self.y, - x, - self.regimes, - yend=yd, - q=q, - w=self.w, - name_y=y_var, - name_x=x_var, - name_yend=yd_var, - name_q=q_var, - name_regimes=r_var, - name_ds="columbus", - name_w="columbus.gal", - regime_lag_sep=False, - regime_err_sep=False, - ) - betas = np.array( - [ - [37.87698329], - [-0.89426982], - [31.4714777], - [-0.71640525], - [-0.28494432], - [-0.2294271], - [0.62996544], - ] - ) - np.testing.assert_allclose(reg.betas, betas, RTOL) - vm = np.array( - [ - 109.93469618, - -0.33407447, - 82.05180377, - -1.57964725, - -1.50284553, - -0.15634575, - -1.43800683, - ] - ) - np.testing.assert_allclose(reg.vm[0], vm, RTOL) - chow_regi = np.array( - [ - [0.50743058, 0.47625326], - [0.02437494, 0.87593468], - [0.01377251, 0.9065777], - ] - ) - np.testing.assert_allclose(reg.chow.regi, chow_regi, RTOL) - np.testing.assert_allclose(reg.chow.joint[0], 0.63336222761359162, RTOL) - self.assertListEqual(reg.name_x, ["0_CONSTANT", "0_INC", "1_CONSTANT", "1_INC"]) - self.assertListEqual(reg.name_yend, ["0_HOVAL", "1_HOVAL", "_Global_W_CRIME"]) - self.assertListEqual( - reg.name_q, - ["0_DISCBD", "0_W_INC", "0_W_DISCBD", "1_DISCBD", "1_W_INC", "1_W_DISCBD"], - ) + r_var = 'NSA' + reg = GM_Lag_Regimes(self.y, x, self.regimes, yend=yd, q=q, w=self.w, name_y=y_var, name_x=x_var, name_yend=yd_var, name_q=q_var, name_regimes=r_var, name_ds='columbus', name_w='columbus.gal', regime_lag_sep=False, regime_err_sep=False) + betas = np.array([[ 37.87698329], + [ -0.89426982], + [ 31.4714777 ], + [ -0.71640525], + [ -0.28494432], + [ -0.2294271 ], + [ 0.62996544]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + vm = np.array([ 109.93469618, -0.33407447, 82.05180377, -1.57964725, + -1.50284553, -0.15634575, -1.43800683]) + np.testing.assert_allclose(reg.vm[0], vm,RTOL) + chow_regi = np.array([[ 0.50743058, 0.47625326], + [ 0.02437494, 0.87593468], + [ 0.01377251, 0.9065777 ]]) + np.testing.assert_allclose(reg.chow.regi, chow_regi,RTOL) + np.testing.assert_allclose(reg.chow.joint[0], 0.63336222761359162,RTOL) + self.assertListEqual(reg.name_x, ['0_CONSTANT', '0_INC', '1_CONSTANT', '1_INC']) + self.assertListEqual(reg.name_yend, ['0_HOVAL', '1_HOVAL', '_Global_W_CRIME']) + self.assertListEqual(reg.name_q, ['0_DISCBD', '0_W_INC', '0_W_DISCBD', '1_DISCBD', '1_W_INC', '1_W_DISCBD']) self.assertEqual(reg.name_y, y_var) - -if __name__ == "__main__": +if __name__ == '__main__': unittest.main() From 1af9b1ff24828adf85a9bd91726911680b5bac78 Mon Sep 17 00:00:00 2001 From: Pedro Amaral Date: Wed, 13 Sep 2023 17:34:39 -0300 Subject: [PATCH 3/6] Attempt at fixing the SUR-related failures due to deprecation --- spreg/diagnostics_sur.py | 2 +- spreg/sur_utils.py | 21 ++++++++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/spreg/diagnostics_sur.py b/spreg/diagnostics_sur.py index 8e145b54..7fdfdb1d 100644 --- a/spreg/diagnostics_sur.py +++ b/spreg/diagnostics_sur.py @@ -221,7 +221,7 @@ def surLMlag(n_eq, WS, bigy, bigX, bigE, bigYP, sig, varb): """ # Score - Y = np.hstack((bigy[r]) for r in range(n_eq)) + Y = np.hstack([bigy[r] for r in range(n_eq)]) WY = WS * Y EWY = np.dot(bigE.T, WY) sigi = la.inv(sig) diff --git a/spreg/sur_utils.py b/spreg/sur_utils.py index 50dc7341..bb78d3c8 100644 --- a/spreg/sur_utils.py +++ b/spreg/sur_utils.py @@ -290,7 +290,9 @@ def sur_dict2mat(dicts): """ n_dicts = len(dicts.keys()) - mat = np.vstack((dicts[t] for t in range(n_dicts))) + #mat = np.vstack((dicts[t] for t in range(n_dicts))) + mat = np.vstack([dicts[t] for t in range(n_dicts)]) + return mat @@ -392,10 +394,11 @@ def sur_est(bigXX, bigXy, bigE, bigK): for t in range(n_eq): sxy = sxy + sigi[r, t] * bigXy[(r, t)] sigiXy[r] = sxy - xsigy = np.vstack((sigiXy[t] for t in range(n_eq))) - xsigx = np.vstack( - ((np.hstack(sigiXX[(r, t)] for t in range(n_eq))) for r in range(n_eq)) - ) + #xsigy = np.vstack((sigiXy[t] for t in range(n_eq))) + xsigy = np.vstack(tuple(sigiXy[t] for t in range(n_eq))) + #xsigx = np.vstack(((np.hstack(sigiXX[(r, t)] for t in range(n_eq))) for r in range(n_eq))) + array_lists = [[sigiXX[(r, t)] for t in range(n_eq)] for r in range(n_eq)] + xsigx = np.vstack([np.hstack(arr_list) for arr_list in array_lists]) varb = la.inv(xsigx) beta = np.dot(varb, xsigy) bSUR = sur_mat2dict(beta, bigK) @@ -423,7 +426,9 @@ def sur_resids(bigy, bigX, beta): """ n_eq = len(bigy.keys()) - bigE = np.hstack((bigy[r] - spdot(bigX[r], beta[r])) for r in range(n_eq)) + #bigE = np.hstack((bigy[r] - spdot(bigX[r], beta[r])) for r in range(n_eq)) + bigE = np.hstack(tuple(bigy[r] - spdot(bigX[r], beta[r]) for r in range(n_eq))) + return bigE @@ -449,7 +454,9 @@ def sur_predict(bigy, bigX, beta): """ n_eq = len(bigy.keys()) - bigYP = np.hstack(spdot(bigX[r], beta[r]) for r in range(n_eq)) + #bigYP = np.hstack(spdot(bigX[r], beta[r]) for r in range(n_eq)) + bigYP = np.hstack([spdot(bigX[r], beta[r]) for r in range(n_eq)]) + return bigYP From 503316c4855a4ace846e2e8ec86601ffcb769d46 Mon Sep 17 00:00:00 2001 From: Pedro Amaral Date: Wed, 13 Sep 2023 17:42:40 -0300 Subject: [PATCH 4/6] Attempt at fixing last failed SUR file --- spreg/sur_error.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spreg/sur_error.py b/spreg/sur_error.py index b88e1220..37143f5d 100644 --- a/spreg/sur_error.py +++ b/spreg/sur_error.py @@ -553,7 +553,7 @@ def __init__(self, bigy, bigX, w, epsilon=0.0000001): bigE = sur_resids(self.bigy, self.bigX, b1) res = minimize( clik, - lam, + np.array(lam).flatten(), args=(self.n, self.n2, self.n_eq, bigE, I, WS), method="L-BFGS-B", bounds=lambdabounds, @@ -1119,7 +1119,7 @@ def _test(): bigy0, bigX0, w, - regimes=regimes, + #regimes=regimes, name_bigy=bigyvars0, name_bigX=bigXvars0, name_w="natqueen", From e949afa260fec1558349c5d839c9b6ccc7f95cb4 Mon Sep 17 00:00:00 2001 From: Pedro Amaral Date: Wed, 13 Sep 2023 18:01:14 -0300 Subject: [PATCH 5/6] Following Eli's fix! Thanks, Eli! =) --- .github/workflows/unittests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml index 5e023225..e8db45b7 100644 --- a/.github/workflows/unittests.yml +++ b/.github/workflows/unittests.yml @@ -47,7 +47,7 @@ - name: install bleeding edge libpysal (only Ubuntu / Python 3.9) shell: bash -l {0} - run: pip install git+https://github.com/pysal/libpysal.git@master + run: pip install git+https://github.com/pysal/libpysal.git@main if: matrix.os == 'ubuntu-latest' && contains(matrix.environment-file, 'DEV') - name: run tests - bash From c74cf34697340bebf1a39319dad6b64eec272f22 Mon Sep 17 00:00:00 2001 From: Pedro Amaral Date: Thu, 14 Sep 2023 14:14:59 -0300 Subject: [PATCH 6/6] Adding message about the lack of implementation of latex format for the output --- spreg/output.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spreg/output.py b/spreg/output.py index c871c842..25f94e66 100644 --- a/spreg/output.py +++ b/spreg/output.py @@ -16,6 +16,8 @@ ############################################################################### def output(reg, vm, other_end=False, robust=False, latex=False): + if latex: + print("Warning: Latex output not implemented yet. Using standard output instead.") strSummary = output_start(reg) for eq in reg.output['equation'].unique(): try: