-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
2a1404d
commit a4c68c8
Showing
3 changed files
with
201 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
This is a script to download hadUK data, written by Barnaby Dobson, 5th, Dec, 2019 | ||
""" | ||
|
||
import os | ||
from tqdm import tqdm | ||
import haduk_downloader_functions | ||
|
||
"""User data | ||
This includes, username, password, output location and details for the download | ||
""" | ||
|
||
data={'username':'USER_NAME', | ||
'password':'PASSWORD'} # create an account here: https://services.ceda.ac.uk/cedasite/register/info/ | ||
|
||
|
||
output_folder = os.path.join("C:\\","Users","bdobson","Downloads","") | ||
start_year = 1975 | ||
end_year = 1980 # Up to but not including the end_year | ||
grid_scale = 25 # km | ||
period = 'day' #'day', 'mon' or 'ann' | ||
variable = 'rainfall' #tested 'rainfall' and 'tas' (temperature) | ||
|
||
|
||
"""Website information | ||
Give site and file urls | ||
Create dates and file names | ||
""" | ||
|
||
site_url = 'https://auth.ceda.ac.uk/account/signin/' # login page | ||
|
||
file_url_base = 'http://dap.ceda.ac.uk/thredds/fileServer/badc/ukmo-hadobs/data/insitu/MOHC/HadOBS/HadUK-Grid/v1.0.1.0/' # hadUK data base address | ||
version = 'v20190808' # the latest release I suppose | ||
|
||
dates_series = haduk_downloader_functions.createDateseries(period,start_year,end_year) | ||
|
||
file_name = haduk_downloader_functions.getFilename(variable,grid_scale,period) | ||
|
||
file_location = haduk_downloader_functions.getFileweblocation(grid_scale,variable,period,version) | ||
|
||
|
||
"""Login | ||
""" | ||
|
||
try: | ||
s = haduk_downloader_functions.startClient(data,site_url) | ||
except: | ||
cert_location = 'qvevsslg3.pem' # Available from https://www.quovadisglobal.com/QVRepository/DownloadRootsAndCRL.aspx and translated to .pem following instructions in https://incognitjoe.github.io/adding-certs-to-requests.html | ||
haduk_downloader_functions.addCertificate(cert_location) | ||
s = haduk_downloader_functions.startClient(data,site_url) | ||
|
||
|
||
"""Iterate over years | ||
""" | ||
|
||
for period_text in tqdm(dates_series): | ||
file_url = file_url_base + file_location + period_text + '.nc' | ||
req = s.get(file_url) | ||
with open(os.path.join(output_folder,file_name + period_text + '.nc'),'wb') as f: | ||
f.write(req.content) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Created on Thu Dec 5 09:03:38 2019 | ||
@author: bdobson | ||
""" | ||
import requests | ||
import pandas as pd | ||
import certifi | ||
|
||
def startClient(data,site_url): | ||
"""Log in to the ceda data store | ||
""" | ||
s = requests.Session() | ||
s.get(site_url) | ||
csrftoken = s.cookies['csrftoken'] | ||
data['csrfmiddlewaretoken']=csrftoken | ||
r = s.post(site_url,data=data,headers=dict(Referer=site_url)) | ||
# print(r.content) | ||
return s | ||
|
||
def createDateseries(period,start_year,end_year): | ||
end_year += 1 | ||
if period == 'day': | ||
dates = pd.date_range(pd.to_datetime(start_year*100+1,format='%Y%m'), | ||
pd.to_datetime(end_year*100+1,format='%Y%m'), | ||
freq='m') | ||
dates_series = dates.strftime('%Y%m') + '01-' + dates.strftime('%Y%m%d') | ||
else: | ||
dates = pd.date_range(pd.to_datetime(start_year*100+1,format='%Y%m'), | ||
pd.to_datetime(end_year*100+1,format='%Y%m'), | ||
freq='Y') | ||
dates_series = dates.strftime('%Y') + '01-' + dates.strftime('%Y%m') | ||
return dates_series | ||
|
||
def getFilename(variable,grid_scale,period): | ||
return '%s_hadukgrid_uk_%dkm_%s_' % (variable, | ||
grid_scale, | ||
period) | ||
|
||
def getFileweblocation(grid_scale,variable,period,version): | ||
return '%dkm/%s/%s/%s/%s' % (grid_scale, | ||
variable, | ||
period, | ||
version, | ||
getFilename(variable,grid_scale,period)) | ||
|
||
def addCertificate(cert_location): | ||
cafile = certifi.where() | ||
with open(cert_location,'rb') as infile: | ||
customca = infile.read() | ||
with open(cafile,'ab') as outfile: | ||
outfile.write(customca) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
This is a script to aggregate hadUK data, written by Barnaby Dobson, 5th, Dec, 2019 | ||
Data must have already been downloaded (see download_haduk_data.py) | ||
""" | ||
|
||
import pandas as pd | ||
import geopandas as gpd | ||
from shapely.geometry import Point | ||
import xarray as xr | ||
import os | ||
import haduk_downloader_functions | ||
from tqdm import tqdm | ||
|
||
"""Enter addresses and input data | ||
""" | ||
shp_address = os.path.join("C:\\","Users","bdobson","Downloads","example.shp") | ||
data_address = os.path.join("C:\\","Users","bdobson","Downloads","") | ||
output_address = os.path.join("C:\\","Users","bdobson","Downloads","") | ||
|
||
start_year = 1975 | ||
end_year = 1980 | ||
grid_scale = 25 # km | ||
period = 'day' #'day', 'mon' or 'ann' | ||
variable = 'rainfall' #tested 'rainfall' and 'tas' (temperature) | ||
|
||
name = 'zone_name' # the 'name' column in the .shp file (or whatever column you want the output files to be named) | ||
|
||
"""Load shapefile and create file names | ||
""" | ||
shp = gpd.read_file(shp_address).reset_index() | ||
|
||
dates_series = haduk_downloader_functions.createDateseries(period,start_year,end_year) | ||
|
||
file_name = haduk_downloader_functions.getFilename(variable,grid_scale,period) | ||
|
||
"""Load data (needs netcdf4 installed) | ||
""" | ||
data = [] | ||
for period_text in tqdm(dates_series): | ||
ds = xr.open_dataset(data_address + file_name + period_text + '.nc') | ||
df = ds.to_dataframe() | ||
data.append(df) | ||
data = pd.concat(data) | ||
|
||
"""Remove grid points that have no data | ||
""" | ||
ind = data[variable] < 100000000 | ||
data = data.loc[ind].reset_index() | ||
|
||
"""Assign grid points to the different shapes in the shapefile (this is slow) | ||
""" | ||
print('Assigning grid points') | ||
grid_points = data[['latitude','longitude']].drop_duplicates() | ||
points = [] | ||
for idx in tqdm(grid_points.index): | ||
row = grid_points.loc[idx] | ||
ind = np.where((data.latitude == row.latitude) & (data.longitude == row.longitude)) | ||
points.append({'lon':row.longitude,'lat':row.latitude,'data_rows':ind,'index':str(idx),'geometry':Point(row.longitude,row.latitude)}) | ||
points = gpd.GeoDataFrame(points) | ||
points = points.set_index('index') | ||
points.crs = {'init' :'epsg:4326'} | ||
points = gpd.tools.sjoin(points,shp,how='left') | ||
|
||
"""Aggregate data and print to csv | ||
""" | ||
for i in shp['index']: | ||
point_ind = points[name] == shp.loc[i,name] | ||
points.loc[point_ind,'data_rows'] | ||
in_geom = np.concatenate(np.concatenate(points.loc[point_ind,'data_rows'])) | ||
|
||
data_in_geom = data.iloc[in_geom].reset_index() | ||
data_in_geom = data_in_geom.groupby('time').mean() # Average over all points inside a shape | ||
data_in_geom['date'] = data_in_geom.index.date.tolist() | ||
data_in_geom = data_in_geom[['date',variable]] | ||
|
||
iname = shp.loc[shp['index']==i,name].iloc[0] | ||
|
||
data_in_geom.to_csv(os.path.join(output_address, | ||
'_'.join([iname, | ||
variable, | ||
str(start_year), | ||
str(end_year), | ||
period, | ||
str(grid_scale), | ||
'km'])+".csv"), | ||
sep=',',index=False) |