Skip to content

Commit

Permalink
integrate smaup into tobler
Browse files Browse the repository at this point in the history
- wrapper function over esda smaup
- just tell user whether or not variable is affected by MAUP, if they want more info they should run it themselves for now
- allow user to pass own regions and weights object in smaup_kwds
- begin diagnostics module
- only import warn from warnings
  • Loading branch information
AnGWar26 committed Mar 30, 2021
1 parent 32c8525 commit 449bc14
Show file tree
Hide file tree
Showing 7 changed files with 87 additions and 2 deletions.
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ dependencies:
- mapclassify
- descartes
- joblib
- esda
1 change: 1 addition & 0 deletions tobler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
from . import dasymetric
from . import model
from . import util
from . import diagnostics
19 changes: 17 additions & 2 deletions tobler/area_weighted/area_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@
import numpy as np
import geopandas as gpd
from ._vectorized_raster_interpolation import _fast_append_profile_in_gdf
import warnings
from warnings import warn
from scipy.sparse import dok_matrix, diags, coo_matrix
import pandas as pd
import os

from tobler.util.util import _check_crs, _nan_check, _inf_check, _check_presence_of_crs
from tobler.diagnostics import _smaup


def _chunk_dfs(geoms_to_chunk, geoms_full, n_jobs):
Expand Down Expand Up @@ -268,6 +269,7 @@ def _area_interpolate_binning(
spatial_index="auto",
n_jobs=1,
categorical_variables=None,
smaup_kwds=None
):
"""
Area interpolation for extensive, intensive and categorical variables.
Expand Down Expand Up @@ -307,6 +309,11 @@ def _area_interpolate_binning(
of `pygeos` and `geopandas`.
categorical_variables : list
[Optional. Default=None] Columns in dataframes for categorical variables
smaup_kwds : dict
[Optional. Default = None] Keyword arguments for tobler's smaup wrapper
Requires the following values:
int: 'k' for number of regions to be tested and
libpysal.weights: 'w' to calculate Moran's I.
Returns
-------
Expand Down Expand Up @@ -348,6 +355,14 @@ def _area_interpolate_binning(
source_df = source_df.copy()
target_df = target_df.copy()

if smaup_kwds is not None:
for var in intensive_variables,extensive_variables:
stat = _smaup(smaup_kwds["k"], source_df[var].to_numpy(), smaup_kwds["w"])
if stat.summary.find('H0 is rejected'):
warn(f"{var} is affected by the MAUP. Interpolations of this variable may not be accourate!")
else:
print(f"{var} is not affected by the MAUP.")

if _check_crs(source_df, target_df):
pass
else:
Expand Down Expand Up @@ -614,7 +629,7 @@ def _area_tables_raster(
res_union_pre = gpd.overlay(source_df, target_df, how="union")

# Establishing a CRS for the generated union
warnings.warn(
warn(
"The CRS for the generated union will be set to be the same as source_df."
)
res_union_pre.crs = source_df.crs
Expand Down
17 changes: 17 additions & 0 deletions tobler/dasymetric/masked_area_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

from ..area_weighted._vectorized_raster_interpolation import *

from tobler.diagnostics import _smaup
from warnings import warn


def masked_area_interpolate(
source_df,
Expand All @@ -13,6 +16,7 @@ def masked_area_interpolate(
intensive_variables=None,
allocate_total=True,
tables=None,
smaup_kwds=None,
):
"""Interpolate data between two polygonal datasets using an auxiliary raster to mask out uninhabited land.
Expand All @@ -38,6 +42,11 @@ def masked_area_interpolate(
whether to allocate the total from the source geometries (the default is True).
tables : tuple of two numpy.array (optional)
As generated from `tobler.area_weighted.area_tables_raster` (the default is None).
smaup_kwds : dict
[Optional. Default = None] Keyword arguments for tobler's smaup wrapper
Requires the following values:
int: 'k' for number of regions to be tested and
libpysal.weights: 'w' to calculate Moran's I.
Returns
-------
Expand All @@ -53,6 +62,14 @@ def masked_area_interpolate(
"You must pass the path to a raster that can be read with rasterio"
)

if smaup_kwds is not None:
for var in intensive_variables,extensive_variables:
stat = _smaup(smaup_kwds["k"], source_df[var].to_numpy(), smaup_kwds["w"])
if stat.summary.find('H0 is rejected'):
warn(f"{var} is affected by the MAUP. Interpolations of this variable may not be accourate!")
else:
print(f"{var} is not affected by the MAUP.")

if not tables:
tables = _area_tables_raster(
source_df,
Expand Down
1 change: 1 addition & 0 deletions tobler/diagnostics/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .smaup import _smaup
35 changes: 35 additions & 0 deletions tobler/diagnostics/smaup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""
A wrapper for using the S-maup statistical test in tobler interpolation
"""

from esda.smaup import Smaup
from esda.moran import Moran

def _smaup(k, y, w,):
"""
A function that wraps esda's smaup function and automates some of the process of calculating smaup.
For more about smaup read here: https://pysal.org/esda/generated/esda.Smaup.html#esda.Smaup
Parameters
----------
k : int
number of regions
y : array
data for autocorellation calculation
w : libpysal.weights object
pysal spatial weights object for autocorellation calculation
Returns
-------
esda.smaup.Smaup:
statistic that contains information regarding the extent to which the variable is affected by the MAUP.
"""
rho = Moran(y, w).I
n = len(y)
stat = Smaup(n,k,rho)
return stat

15 changes: 15 additions & 0 deletions tobler/model/glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
_return_weights_from_regression,
)
from tobler.util import project_gdf
from tobler.diagnostics import _smaup


def glm_pixel_adjusted(
Expand Down Expand Up @@ -100,6 +101,7 @@ def glm(
likelihood="poisson",
force_crs_match=True,
return_model=False,
smaup_kwds=None
):
"""Estimate interpolated values using raster data as input to a generalized linear model.
Expand Down Expand Up @@ -130,6 +132,11 @@ def glm(
return model : bool
whether to return the fitted model in addition to the interpolated geodataframe.
If true, this will return (geodataframe, model)
smaup_kwds : dict
[Optional. Default = None] Keyword arguments for tobler's smaup wrapper
Requires the following values:
int: 'k' for number of regions to be tested and
libpysal.weights: 'w' to calculate Moran's I.
Returns
--------
Expand All @@ -143,6 +150,14 @@ def glm(
source_df = source_df.copy()
target_df = target_df.copy()
_check_presence_of_crs(source_df)

if smaup_kwds is not None:
stat = _smaup(smaup_kwds["k"], source_df[variable].to_numpy(), smaup_kwds["w"])
if stat.summary.find('H0 is rejected'):
warn(f"{variable} is affected by the MAUP. Interpolations of this variable may not be accourate!")
else:
print(f"{variable} is not affected by the MAUP.")

liks = {"poisson": Poisson, "gaussian": Gaussian, "neg_binomial": NegativeBinomial}

if likelihood not in liks.keys():
Expand Down

0 comments on commit 449bc14

Please sign in to comment.