Modelos de Regresion Espacial SAR#
Autocorrelación Endógena con la variable dependiente#
import geopandas as gpd
import pandas as pd
import numpy as np
import libpysal
from libpysal.weights import Queen
from sklearn.preprocessing import StandardScaler
import spreg
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[1], line 4
2 import pandas as pd
3 import numpy as np
----> 4 import libpysal
5 from libpysal.weights import Queen
6 from sklearn.preprocessing import StandardScaler
ModuleNotFoundError: No module named 'libpysal'
gdf = gpd.read_file("https://github.com/algarciach/AnalisisGeoespacial/raw/main/Covid19_model/Data/covid19_municipios_antioquia.gpkg")
gdf.info()
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 125 entries, 0 to 124
Data columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 codigo_municipio 125 non-null object
1 nombre_municipio 125 non-null object
2 codigo_subregion 125 non-null object
3 nombre_subregion 125 non-null object
4 area_municipio 125 non-null float64
5 altitud 125 non-null float64
6 temperatura 125 non-null float64
7 humedad_relativa 125 non-null float64
8 poblacion 125 non-null int64
9 urbanizacion 125 non-null float64
10 densidad 125 non-null float64
11 muertes_covid19 125 non-null int64
12 recuperados_covid19 125 non-null int64
13 cfr 125 non-null float64
14 geometry 125 non-null geometry
dtypes: float64(7), geometry(1), int64(3), object(4)
memory usage: 14.8+ KB
gdf['density_cfr'] = (gdf['cfr'] / gdf['area_municipio'])
gdf['y'] = np.log(gdf['density_cfr'] + 1)
y = gdf['y'].values.reshape(-1, 1)
independent_vars = ["altitud", "temperatura", "humedad_relativa", "urbanizacion", "densidad"]
X = gdf[independent_vars].values
st = StandardScaler()
X_scaled = st.fit_transform(X)
X_scaled_df = pd.DataFrame(X_scaled, columns=independent_vars)
w = Queen.from_dataframe(gdf)
wx_dict = {}
for var in independent_vars:
wx_dict[f'w_{var}'] = libpysal.weights.spatial_lag.lag_spatial(w, X_scaled_df[var])
wx_df = pd.DataFrame(wx_dict)
slx_exog = pd.concat([X_scaled_df, wx_df], axis=1)
ols_model = spreg.OLS(gdf['y'].values.reshape((-1, 1)),
slx_exog.values,
name_y='log_density_cfr',
name_x=slx_exog.columns.tolist(),
name_w='queen_contiguity')
print(ols_model.summary)
/var/folders/nq/kj34bm5140bgwbktdx1z94cm0000gn/T/ipykernel_1010/1856113932.py:11: FutureWarning: `use_index` defaults to False but will default to True in future. Set True/False directly to control this behavior and silence this warning
w = Queen.from_dataframe(gdf)
REGRESSION RESULTS
------------------
SUMMARY OF OUTPUT: ORDINARY LEAST SQUARES
-----------------------------------------
Data set : unknown
Weights matrix : None
Dependent Variable :log_density_cfr Number of Observations: 125
Mean dependent var : 0.0146 Number of Variables : 11
S.D. dependent var : 0.0161 Degrees of Freedom : 114
R-squared : 0.2723
Adjusted R-squared : 0.2085
Sum squared residual: 0.023518 F-statistic : 4.2656
Sigma-square : 0.000 Prob(F-statistic) : 4.707e-05
S.E. of regression : 0.014 Log likelihood : 358.777
Sigma-square ML : 0.000 Akaike info criterion : -695.553
S.E of regression ML: 0.0137 Schwarz criterion : -664.442
------------------------------------------------------------------------------------
Variable Coefficient Std.Error t-Statistic Probability
------------------------------------------------------------------------------------
CONSTANT 0.01504 0.00140 10.75680 0.00000
altitud 0.03957 0.01723 2.29662 0.02347
temperatura 0.03626 0.01619 2.24025 0.02701
humedad_relativa -0.00084 0.00286 -0.29564 0.76804
urbanizacion 0.00136 0.00151 0.90349 0.36817
densidad 0.00411 0.00174 2.35734 0.02011
w_altitud -0.00818 0.00484 -1.68961 0.09384
w_temperatura -0.00805 0.00472 -1.70509 0.09090
w_humedad_relativa -0.00035 0.00072 -0.48710 0.62712
w_urbanizacion -0.00032 0.00056 -0.56459 0.57346
w_densidad 0.00094 0.00069 1.36916 0.17364
------------------------------------------------------------------------------------
REGRESSION DIAGNOSTICS
MULTICOLLINEARITY CONDITION NUMBER 53.574
TEST ON NORMALITY OF ERRORS
TEST DF VALUE PROB
Jarque-Bera 2 832.594 0.0000
DIAGNOSTICS FOR HETEROSKEDASTICITY
RANDOM COEFFICIENTS
TEST DF VALUE PROB
Breusch-Pagan test 10 47.994 0.0000
Koenker-Bassett test 10 7.130 0.7131
================================ END OF REPORT =====================================
spatial_error_model = spreg.GM_Error_Het(y, X_scaled, w=w,
name_y='log_density_cfr_rec',
name_x=independent_vars,
name_w='queen_contiguity')
print(spatial_error_model.summary)
REGRESSION RESULTS
------------------
SUMMARY OF OUTPUT: GM SPATIALLY WEIGHTED LEAST SQUARES (HET)
------------------------------------------------------------
Data set : unknown
Weights matrix :queen_contiguity
Dependent Variable :log_density_cfr_rec Number of Observations: 125
Mean dependent var : 0.0146 Number of Variables : 6
S.D. dependent var : 0.0161 Degrees of Freedom : 119
Pseudo R-squared : 0.2464
N. of iterations : 1 Step1c computed : No
------------------------------------------------------------------------------------
Variable Coefficient Std.Error z-Statistic Probability
------------------------------------------------------------------------------------
CONSTANT 0.01605 0.00193 8.33170 0.00000
altitud 0.02175 0.00914 2.37910 0.01735
temperatura 0.01928 0.00945 2.04110 0.04124
humedad_relativa -0.00127 0.00123 -1.03564 0.30037
urbanizacion 0.00166 0.00173 0.95863 0.33774
densidad 0.00478 0.00182 2.62630 0.00863
lambda 0.05693 0.02174 2.61928 0.00881
------------------------------------------------------------------------------------
================================ END OF REPORT =====================================
sem = spreg.ML_Error(y, X, w=w, name_y='log_density_cfr_rec', name_x=independent_vars, name_w='queen_contiguity')
print(sem.summary)
REGRESSION RESULTS
------------------
SUMMARY OF OUTPUT: ML SPATIAL ERROR (METHOD = full)
---------------------------------------------------
Data set : unknown
Weights matrix :queen_contiguity
Dependent Variable :log_density_cfr_rec Number of Observations: 125
Mean dependent var : 0.0146 Number of Variables : 6
S.D. dependent var : 0.0161 Degrees of Freedom : 119
Pseudo R-squared : 0.2458
Log likelihood : 358.0124
Sigma-square ML : 0.0002 Akaike info criterion : -704.025
S.E of regression : 0.0137 Schwarz criterion : -687.055
------------------------------------------------------------------------------------
Variable Coefficient Std.Error z-Statistic Probability
------------------------------------------------------------------------------------
CONSTANT -0.13108 0.09551 -1.37239 0.16994
altitud 0.00003 0.00001 2.16836 0.03013
temperatura 0.00561 0.00289 1.94133 0.05222
humedad_relativa -0.00026 0.00034 -0.74815 0.45437
urbanizacion 0.00879 0.00733 1.19882 0.23060
densidad 0.00000 0.00000 3.18338 0.00146
lambda 0.05508 0.02323 2.37076 0.01775
------------------------------------------------------------------------------------
================================ END OF REPORT =====================================
/usr/local/Caskroom/miniforge/base/envs/geo/lib/python3.11/site-packages/spreg/ml_error.py:184: RuntimeWarning: Method 'bounded' does not support relative tolerance in x; defaulting to absolute tolerance.
res = minimize_scalar(
spatial_lag_model = spreg.GM_Lag(y, X_scaled, w=w,
name_y='log_density_cfr_rec',
name_x=independent_vars,
name_w='queen_contiguity')
print(spatial_lag_model.summary)
REGRESSION RESULTS
------------------
SUMMARY OF OUTPUT: SPATIAL TWO STAGE LEAST SQUARES
--------------------------------------------------
Data set : unknown
Weights matrix :queen_contiguity
Dependent Variable :log_density_cfr_rec Number of Observations: 125
Mean dependent var : 0.0146 Number of Variables : 7
S.D. dependent var : 0.0161 Degrees of Freedom : 118
Pseudo R-squared : 0.2489
Spatial Pseudo R-squared: 0.2504
------------------------------------------------------------------------------------
Variable Coefficient Std.Error z-Statistic Probability
------------------------------------------------------------------------------------
CONSTANT 0.01473 0.00419 3.51910 0.00043
altitud 0.01809 0.01059 1.70716 0.08779
temperatura 0.01562 0.01004 1.55585 0.11974
humedad_relativa -0.00142 0.00144 -0.97976 0.32720
urbanizacion 0.00162 0.00139 1.16594 0.24364
densidad 0.00521 0.00162 3.20736 0.00134
W_log_density_cfr_rec -0.00244 0.05695 -0.04284 0.96583
------------------------------------------------------------------------------------
Instrumented: W_log_density_cfr_rec
Instruments: W_altitud, W_densidad, W_humedad_relativa, W_temperatura,
W_urbanizacion
DIAGNOSTICS FOR SPATIAL DEPENDENCE
TEST DF VALUE PROB
Anselin-Kelejian Test 1 0.432 0.5110
SPATIAL LAG MODEL IMPACTS
Impacts computed using the 'simple' method.
Variable Direct Indirect Total
altitud 0.0181 -0.0000 0.0180
temperatura 0.0156 -0.0000 0.0156
humedad_relativa -0.0014 0.0000 -0.0014
urbanizacion 0.0016 -0.0000 0.0016
densidad 0.0052 -0.0000 0.0052
================================ END OF REPORT =====================================