diff --git a/esda/crand.py b/esda/crand.py index 523cfbd4..235dbc3b 100644 --- a/esda/crand.py +++ b/esda/crand.py @@ -89,8 +89,15 @@ def crand( Spatial weights object observed : ndarray (N,) array with observed values - permutations : int - Number of permutations for conditional randomisation + permutations : int, np.ndarray + Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the + conditional random null hypothesis for each site. Permutations + might be specified as an array of indices if the user needs to add + structure to this conditional permutation null hypothesis. Common + reasons to do this include exchangeability violations, which might + then require us to shuffle observations within (but not between) + groups, or linearity constraints, which may require certain + sequences of observation relationships to be preserved. keep : Boolean If True, store simulation; else do not return randomised statistics n_jobs : int @@ -170,8 +177,40 @@ def crand( # self neighbor, since conditional randomization conditions on site i. cardinalities = np.array((adj_matrix != 0).sum(1)).flatten() max_card = cardinalities.max() - permuted_ids = vec_permutations(max_card, n, permutations, seed) + if np.ndim(permutations) == 0: + # Random permutation array + if int(permutations) != permutations: + raise ValueError( + f"An integer number of permutations is required, but {permutations} was provided." + ) + permuted_ids = vec_permutations(max_card, n, permutations, seed) + elif np.ndim(permutations) == 2: + # User defined permutation array + permuted_ids = permutations + if permuted_ids.shape[0] != permutations: + permutations = permuted_ids.shape[0] + warnings.warn( + f"Number of permutations has been adjusted to match the length of the " + f"permutations array. New value of 'permutations' is {permutations}.", + stacklevel=2, + ) + if permuted_ids.shape[1] < max_card: + raise ValueError( + f"The `permutations` provided were shape {permuted_ids.shape}" + f", but must be ({permutations, max_card}) in order to supply" + f" enough possible neighbors to shuffle every observation." + f" Consider supplying a wider permutation matrix, with" + f" {max_card-permuted_ids.shape[1]} more columns." + ) + else: + raise ValueError( + f"The `permutations` argument must be either an integer, or a" + f" two-dimensional numpy array of shape (p,k), where p is the" + f" number of permutations to run and k is the largest amount of" + f" shuffled pairs that must be considered at a given site, but" + f" {permutations} was provided." + ) if n_jobs != 1: try: import joblib # noqa: F401 diff --git a/esda/geary_local.py b/esda/geary_local.py index 584bd55e..7543307a 100644 --- a/esda/geary_local.py +++ b/esda/geary_local.py @@ -42,10 +42,15 @@ def __init__( (default=0.05) Default significance threshold used for creation of labels groups. - permutations : int - (default=999) - number of random permutations for calculation - of pseudo p_values + permutations : int, np.ndarray + Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the + conditional random null hypothesis for each site. Permutations + might be specified as an array of indices if the user needs to add + structure to this conditional permutation null hypothesis. Common + reasons to do this include exchangeability violations, which might + then require us to shuffle observations within (but not between) + groups, or linearity constraints, which may require certain + sequences of observation relationships to be preserved. n_jobs : int (default=1) Number of cores to be used in the conditional diff --git a/esda/getisord.py b/esda/getisord.py index 3862468e..a89ed102 100644 --- a/esda/getisord.py +++ b/esda/getisord.py @@ -257,9 +257,15 @@ class G_Local: # noqa: N801 spatial weights instance as W or Graph aligned with y transform : {'R', 'B'} the type of w, either 'B' (binary) or 'R' (row-standardized) - permutations : int - the number of random permutations for calculating - pseudo p values + permutations : int, np.ndarray + Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the + conditional random null hypothesis for each site. Permutations + might be specified as an array of indices if the user needs to add + structure to this conditional permutation null hypothesis. Common + reasons to do this include exchangeability violations, which might + then require us to shuffle observations within (but not between) + groups, or linearity constraints, which may require certain + sequences of observation relationships to be preserved. star : boolean or float whether or not to include focal observation in sums (default: False) if the row-transformed weight is provided, then this is the default diff --git a/esda/join_counts_local.py b/esda/join_counts_local.py index 86277958..da0d2fab 100644 --- a/esda/join_counts_local.py +++ b/esda/join_counts_local.py @@ -30,9 +30,15 @@ def __init__( ---------- connectivity : W | Graph spatial weights instance as W or Graph aligned with y - permutations : int - number of random permutations for calculation of pseudo - p_values + permutations : int, np.ndarray + Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the + conditional random null hypothesis for each site. Permutations + might be specified as an array of indices if the user needs to add + structure to this conditional permutation null hypothesis. Common + reasons to do this include exchangeability violations, which might + then require us to shuffle observations within (but not between) + groups, or linearity constraints, which may require certain + sequences of observation relationships to be preserved. n_jobs : int Number of cores to be used in the conditional randomisation. If -1, all available cores are used. diff --git a/esda/join_counts_local_bv.py b/esda/join_counts_local_bv.py index 69979b8c..88a21996 100644 --- a/esda/join_counts_local_bv.py +++ b/esda/join_counts_local_bv.py @@ -30,8 +30,15 @@ def __init__( ---------- connectivity : W | Graph spatial weights instance as W or Graph aligned with y - permutations : int - number of random permutations for calculation of pseudo p_values + permutations : int, np.ndarray + Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the + conditional random null hypothesis for each site. Permutations + might be specified as an array of indices if the user needs to add + structure to this conditional permutation null hypothesis. Common + reasons to do this include exchangeability violations, which might + then require us to shuffle observations within (but not between) + groups, or linearity constraints, which may require certain + sequences of observation relationships to be preserved. n_jobs : int Number of cores to be used in the conditional randomisation. If -1, all available cores are used. diff --git a/esda/join_counts_local_mv.py b/esda/join_counts_local_mv.py index e9e94f25..fab44daf 100644 --- a/esda/join_counts_local_mv.py +++ b/esda/join_counts_local_mv.py @@ -30,8 +30,15 @@ def __init__( ---------- connectivity : W | Graph spatial weights instance as W or Graph aligned with y - permutations : int - number of random permutations for calculation of pseudo p_values + permutations : int, np.ndarray + Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the + conditional random null hypothesis for each site. Permutations + might be specified as an array of indices if the user needs to add + structure to this conditional permutation null hypothesis. Common + reasons to do this include exchangeability violations, which might + then require us to shuffle observations within (but not between) + groups, or linearity constraints, which may require certain + sequences of observation relationships to be preserved. n_jobs : int Number of cores to be used in the conditional randomisation. If -1, all available cores are used. diff --git a/esda/moran.py b/esda/moran.py index 79a32c6b..84deaa8c 100644 --- a/esda/moran.py +++ b/esda/moran.py @@ -891,9 +891,15 @@ class Moran_Local: # noqa: N801 "D": doubly-standardized, "U": untransformed (general weights), "V": variance-stabilizing. - permutations : int - number of random permutations for calculation of pseudo - p_values + permutations : int, np.ndarray + Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the + conditional random null hypothesis for each site. Permutations + might be specified as an array of indices if the user needs to add + structure to this conditional permutation null hypothesis. Common + reasons to do this include exchangeability violations, which might + then require us to shuffle observations within (but not between) + groups, or linearity constraints, which may require certain + sequences of observation relationships to be preserved. geoda_quads : boolean (default=False) If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4 @@ -1265,9 +1271,15 @@ class Moran_Local_BV: # noqa: N801 "D": doubly-standardized, "U": untransformed (general weights), "V": variance-stabilizing. - permutations : int - number of random permutations for calculation of pseudo - p_values + permutations : int, np.ndarray + Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the + conditional random null hypothesis for each site. Permutations + might be specified as an array of indices if the user needs to add + structure to this conditional permutation null hypothesis. Common + reasons to do this include exchangeability violations, which might + then require us to shuffle observations within (but not between) + groups, or linearity constraints, which may require certain + sequences of observation relationships to be preserved. geoda_quads : boolean (default=False) If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4 @@ -1406,6 +1418,7 @@ def __init__( w, self.Is, permutations, + None, keep_simulations, n_jobs=n_jobs, stat_func=_moran_local_bv_crand, @@ -1535,9 +1548,15 @@ class Moran_Local_Rate(Moran_Local): # noqa: N801 "D": doubly-standardized, "U": untransformed (general weights), "V": variance-stabilizing. - permutations : int - number of random permutations for calculation of pseudo - p_values + permutations : int, np.ndarray + Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the + conditional random null hypothesis for each site. Permutations + might be specified as an array of indices if the user needs to add + structure to this conditional permutation null hypothesis. Common + reasons to do this include exchangeability violations, which might + then require us to shuffle observations within (but not between) + groups, or linearity constraints, which may require certain + sequences of observation relationships to be preserved. geoda_quads : boolean (default=False) If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4 diff --git a/esda/tests/test_moran.py b/esda/tests/test_moran.py index c35d4009..f7d65ba7 100644 --- a/esda/tests/test_moran.py +++ b/esda/tests/test_moran.py @@ -162,6 +162,7 @@ def setup_method(self): f = libpysal.io.open(libpysal.examples.get_path("desmith.txt")) self.y = np.array(f.by_col["z"]) + @parametrize_desmith def test_Moran_Local(self, w): lm = moran.Moran_Local( @@ -174,6 +175,29 @@ def test_Moran_Local(self, w): ) np.testing.assert_allclose(lm.z_sim[0], -0.6990291160835514) np.testing.assert_allclose(lm.p_z_sim[0], 0.24226691753791396) + + @parametrize_desmith + def test_Moran_Local_custom_perms(self, w): + np.random.seed(SEED) + cardinalities = np.array((w.sparse != 0).sum(1)).flatten() + max_card = cardinalities.max() + original_array = np.arange(len(self.y)) + permutations_array = np.zeros((99, max_card), dtype=np.int32) + + for i in range(99): + random_number = np.random.randint(0, len(self.y) - max_card) + permutations_array[i] = original_array[random_number:random_number + max_card] + + lm = moran.Moran_Local( + self.y, + w, + transformation="r", + permutations=permutations_array, + keep_simulations=True, + seed=SEED, + ) + np.testing.assert_allclose(lm.z_sim[0], -0.49229215590070813) + np.testing.assert_allclose(lm.p_z_sim[0], 0.311256412279757) @parametrize_sac def test_Moran_Local_labels(self, w):