From dbaf0455b989f20ba5afa0f65e204143f3dc712d Mon Sep 17 00:00:00 2001 From: Jose Bayona Date: Tue, 27 Feb 2024 15:38:58 +0000 Subject: [PATCH] Added two functions that map GEAR1 onto geographical regions and read forecasts in CSEP format, respectively --- csep/utils/readers.py | 227 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 225 insertions(+), 2 deletions(-) diff --git a/csep/utils/readers.py b/csep/utils/readers.py index 37098574..7f24f7be 100644 --- a/csep/utils/readers.py +++ b/csep/utils/readers.py @@ -1,4 +1,4 @@ -import datetime +import datetime, time import math import re import warnings @@ -884,4 +884,227 @@ def quadtree_csv_loader(csv_fname): rates = rates.astype(float) region = QuadtreeGrid2D.from_quadkeys(quadkeys, magnitudes=mws) - return rates, region, mws \ No newline at end of file + return rates, region, mws + +def mapping_GEAR1(GEAR1_file, area_file, grid_file, b_value=False): + """ Projects and extrapolates annual estimates of M 5.95+, depth <= 70 km global seismicity, + provided by the Global Earthquake Activity Rate (GEAR1) model, onto any geographical region + provided its longitude and latitude coordinates and a regional mean b-value. + + + Args: + GEAR1_file (str): Original text file containing GEAR1 annual M 5.95+ earthquake rates. + + area_file (str): Text file containing calculated areas (in m2) of every 0.1 x 0.1 cell + on Earth. These estimates are useful to report expected number of earth- + quakes per unit time per unit area. + + grid_file (str): Two-column, n-row array containing the centered longitude and latitude + coordinates of all the cells (with spatial resolution of 0.1 x 0.1) that + make up the new testing region. E.g. 25.15, 33.25 + 25.25, 33.25 + 25.35, 33.25 + Each lon lat coordinate must be a two-decimal floating number ending in 5, + i.e., the cell midpoint, as seen above. + + b_value (float): If modellers wish to extroplate GEAR1 M 5.95+ earthquake rates to a lower + magnitude threshold, they must provide a generic b value of the region. + The default magnitude threshold for extrapolating global rates is 4.95. + Conversely, if modelers do not wish to extrapolate earthquake rates to + lower magnitudes but only project GEAR1 over a geographic region, the + b_value variable should remain False. + + Returns + GEAR1_region.dat: Input text file containing GEAR 1 earthquake rates in cells defined within + the desired geographic region. This file feeds a so-called read_GEAR1_format + function, which translates the format in which the GEAR1 forecasts were + originally provided into a pyCSEP-friendly format. + + area_region.dat: Input text file containing the areas of all the cells defined within the + geographical region. This file also feeds the read_GEAR1_format function + and is aimed to express earthquake rates densities as expected number of + earthquakes per year per m2. + + + """ + print ('Reading data...') + bulk_dataW = np.loadtxt(GEAR1_file, skiprows=1, delimiter=',') + bulk_areaW = np.loadtxt(area_file, skiprows=1, delimiter=',') + + # This part of the code is aimed to ensure that all lon and lat coordinates are two-digits floating + # numbers. This is important, because the projection of GEAR1 onto a geographical region is basically + # the intersection between two Pandas data frames. + + latitudesW = [] + longitudesW = [] + + for i in range(len(bulk_dataW)): + longitudesW.append(np.float('%.2f' % round(bulk_dataW[:,0][i],2))) + latitudesW.append(np.float('%.2f' % round(bulk_dataW[:,1][i],2))) + + # This is the first Pandas data frame when no extrapolations are needed: + if not b_value: + GEAR1 = pd.DataFrame() + GEAR1['longitude'] = longitudesW + GEAR1['latitude'] = latitudesW + GEAR1['m595'] = bulk_dataW[:,2] + GEAR1['m605'] = bulk_dataW[:,3] + GEAR1['m615'] = bulk_dataW[:,4] + GEAR1['m625'] = bulk_dataW[:,5] + GEAR1['m635'] = bulk_dataW[:,6] + GEAR1['m645'] = bulk_dataW[:,7] + GEAR1['m655'] = bulk_dataW[:,8] + GEAR1['m665'] = bulk_dataW[:,9] + GEAR1['m675'] = bulk_dataW[:,10] + GEAR1['m685'] = bulk_dataW[:,11] + GEAR1['m695'] = bulk_dataW[:,12] + GEAR1['m705'] = bulk_dataW[:,13] + GEAR1['m715'] = bulk_dataW[:,14] + GEAR1['m725'] = bulk_dataW[:,15] + GEAR1['m735'] = bulk_dataW[:,16] + GEAR1['m745'] = bulk_dataW[:,17] + GEAR1['m755'] = bulk_dataW[:,18] + GEAR1['m765'] = bulk_dataW[:,19] + GEAR1['m775'] = bulk_dataW[:,20] + GEAR1['m785'] = bulk_dataW[:,21] + GEAR1['m795'] = bulk_dataW[:,22] + GEAR1['m805'] = bulk_dataW[:,23] + GEAR1['m815'] = bulk_dataW[:,24] + GEAR1['m825'] = bulk_dataW[:,25] + GEAR1['m835'] = bulk_dataW[:,26] + GEAR1['m845'] = bulk_dataW[:,27] + GEAR1['m855'] = bulk_dataW[:,28] + GEAR1['m865'] = bulk_dataW[:,29] + GEAR1['m875'] = bulk_dataW[:,30] + GEAR1['m885'] = bulk_dataW[:,31] + GEAR1['m895'] = bulk_dataW[:,32] + + # But if extrapolations are indeed desired, this is then the first Pandas data frame: + else: + bv = b_value + GEAR1 = pd.DataFrame() + GEAR1['longitude'] = longitudesW + GEAR1['latitude'] = latitudesW + GEAR1['m495'] = bulk_dataW[:,2] / (10**(-bv * (5.95 - 4.95))) + GEAR1['m505'] = bulk_dataW[:,2] / (10**(-bv * (5.95 - 5.05))) + GEAR1['m515'] = bulk_dataW[:,2] / (10**(-bv * (5.95 - 5.15))) + GEAR1['m525'] = bulk_dataW[:,2] / (10**(-bv * (5.95 - 5.25))) + GEAR1['m535'] = bulk_dataW[:,2] / (10**(-bv * (5.95 - 5.35))) + GEAR1['m545'] = bulk_dataW[:,2] / (10**(-bv * (5.95 - 5.45))) + GEAR1['m555'] = bulk_dataW[:,2] / (10**(-bv * (5.95 - 5.55))) + GEAR1['m565'] = bulk_dataW[:,2] / (10**(-bv * (5.95 - 5.65))) + GEAR1['m575'] = bulk_dataW[:,2] / (10**(-bv * (5.95 - 5.75))) + GEAR1['m585'] = bulk_dataW[:,2] / (10**(-bv * (5.95 - 5.85))) + GEAR1['m595'] = bulk_dataW[:,2] + GEAR1['m605'] = bulk_dataW[:,3] + GEAR1['m615'] = bulk_dataW[:,4] + GEAR1['m625'] = bulk_dataW[:,5] + GEAR1['m635'] = bulk_dataW[:,6] + GEAR1['m645'] = bulk_dataW[:,7] + GEAR1['m655'] = bulk_dataW[:,8] + GEAR1['m665'] = bulk_dataW[:,9] + GEAR1['m675'] = bulk_dataW[:,10] + GEAR1['m685'] = bulk_dataW[:,11] + GEAR1['m695'] = bulk_dataW[:,12] + GEAR1['m705'] = bulk_dataW[:,13] + GEAR1['m715'] = bulk_dataW[:,14] + GEAR1['m725'] = bulk_dataW[:,15] + GEAR1['m735'] = bulk_dataW[:,16] + GEAR1['m745'] = bulk_dataW[:,17] + GEAR1['m755'] = bulk_dataW[:,18] + GEAR1['m765'] = bulk_dataW[:,19] + GEAR1['m775'] = bulk_dataW[:,20] + GEAR1['m785'] = bulk_dataW[:,21] + GEAR1['m795'] = bulk_dataW[:,22] + GEAR1['m805'] = bulk_dataW[:,23] + GEAR1['m815'] = bulk_dataW[:,24] + GEAR1['m825'] = bulk_dataW[:,25] + GEAR1['m835'] = bulk_dataW[:,26] + GEAR1['m845'] = bulk_dataW[:,27] + GEAR1['m855'] = bulk_dataW[:,28] + GEAR1['m865'] = bulk_dataW[:,29] + GEAR1['m875'] = bulk_dataW[:,30] + GEAR1['m885'] = bulk_dataW[:,31] + GEAR1['m895'] = bulk_dataW[:,32] + + area = pd.DataFrame() + area['longitude'] = longitudesW + area['latitude'] = latitudesW + area['area'] = bulk_areaW[:,2] + + # This is simply an artefact to release RAM memory for further computations: + bulk_dataW = [] + latitudesW = [] + longitudesW = [] + + # This is the second Pandas data frame: + bulk_dataR = np.loadtxt(grid_file, skiprows=0, delimiter=' ') + + grid_longitudes = [] + grid_latitudes = [] + + for i in range(len(bulk_dataR)): + grid_longitudes.append(np.float('%.2f' % round(bulk_dataR[:,0][i],2))) + grid_latitudes.append(np.float('%.2f' % round(bulk_dataR[:,1][i],2))) + + polygon = pd.DataFrame() + polygon['longitude'] = grid_longitudes + polygon['latitude'] = grid_latitudes + + # And this is how the intersection between both data frames is carried out: + GEAR1_region = pd.merge(polygon, GEAR1, how="inner", on=['longitude', 'latitude']) + area_region = pd.merge(polygon, area, how="inner", on=['longitude', 'latitude']) + + GEAR1_region.to_csv('GEAR1_region.dat') + area_region.to_csv('areas_region.dat') + + print ('The output files have been stored successfully.') + + +def read_GEAR1_format(filename, area_filename, magnitudes): + """ Reads the format in which GEAR1 forecasts were originally created to translate them into + a format that is readable by pyCSEP. This function is designed to be used as a loader for + the GriddedForecast.from_custom function. + + + Args: + filename (str): Text file containing GEAR1 seismicity rates in original format. This + file is the one of the output products of the mapping_GEAR1 function. + + area_filename (str): Text file containing calculated areas (in m2) of 0.1 lon x 0.1 lat cells. + This file is also an output of the mapping_GEAR1 function. + + magnitudes (array): Array of magnitude bins in which the seismicity forecast is defined. + E.g., [3.95, 4.05, 4.15, ..., 8.95] + + Returns: + :class:`csep.core.forecasts.GriddedForecast` + """ + t0 = time.time() + bulk_data = np.loadtxt(filename, skiprows=1, delimiter=',') + + # Construction of the testing region: + lons = bulk_data[:,1] + lats = bulk_data[:,2] + coords = np.column_stack([lons, lats]) + + # Coordinates are given as midpoints origin should be in the 'lower left' corner: + r = CartesianGrid2D.from_origins(coords, magnitudes=magnitudes) + + # Shape: (num_space_bins, num_mag_bins) + bulk_data_no_coords = bulk_data[:, 3:] + + # Original GEAR1 format provides cumulative rates per meter**2 + incremental_yrly_density = np.diff(np.fliplr(bulk_data_no_coords)) + + # Computing the differences, but returning array with the same shape: + incremental_yrly_density = np.column_stack([np.fliplr(incremental_yrly_density), bulk_data_no_coords[:,-1]]) + + # Read in area to denormalize back onto csep grid + area = np.loadtxt(area_filename, skiprows=1, delimiter=',') + + # Allows to use broadcasting + m2_per_cell = np.reshape(area[:,-1], (len(area[:,1]), 1)) + incremental_yrly_rates = incremental_yrly_density * m2_per_cell + + return incremental_yrly_rates, r, magnitudes