This repository has been archived by the owner on Aug 14, 2023. It is now read-only.
forked from lgarzio/ooi-tools-r
-
Notifications
You must be signed in to change notification settings - Fork 1
/
ooi_quickstart.R
169 lines (129 loc) · 8.1 KB
/
ooi_quickstart.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# R Quickstart for OOI Data
# Modified by Lori Garzio, from Sage's Matlab Quickstart
# 6/14/18
# The OOI Data Portal provides quick and easy access to most of the
# datasets collected by the OOI program. In this tutorial, we will
# demonstrate how to programtically make a request for an OOI dataset and
# then load and plot the results, all using R.
# ---------------------Step 1 - Requesting OOI Data---------------------
# While you can use the OOI Data Portal to request data, sometimes it is
# easier to use the OOI M2M Interface (aka the OOI API) to request data
# from specific instruments, especially when you want to script your data
# processing routine. In addition, it's also useful when the number of
# datasets is more than the few it would be okay to do by hand.
# In order to use the OOI API, you will first need to create an account on
# the OOI Data Portal. Once you have done that, you will need to grab your
# API username and token, which can be found on your profile page. We will
# add them as variables to make it easy to refer to later.
API_USERNAME <- 'your-username';
API_TOKEN <- 'your-token';
# Making asynchronous requests through the API is essentially the same as
# requesting a download from the OOI Data Portal, but with the API you can
# easily create one or more requests in an automated way.
# To make a data request, we basically construct a URL using the reference
# designator, delivery method, stream name and other parameters. You can
# find this information in catalog on the Data Portal.
# The URL is constructured using the following format:
# sensor/inv/{subsite}/{node}/{sensor}/{method}/{stream}
# In order to make the code clear, we're going to setup several variables
# and concatenate all of the variables together with slashes.
site <- 'CP04OSSM'
node <- 'SBD11'
instrument <- '06-METBKA000'
method <- 'telemetered'
stream <- 'metbk_a_dcl_instrument'
api_base_url <- 'https://ooinet.oceanobservatories.org/api/m2m/12576/sensor/inv'
data_request_url <- sprintf('%s/%s/%s/%s/%s/%s', api_base_url, site, node, nstrument, method, stream)
# We can also add options to the data_request_url, such as start (beginDT)
# and ending date/time (endDT). By default, asynchronous requests will return
# NetCDF files, but you could also specify csv or json, using the format
# option. Optionally, you can also specify include_provenance and
# include_annotations, which will include separate text files in the
# output directory with that information. Let's add some of these options to
# the data_request_url.
beginDT <- '2016-01-01T00:00:00.000Z'
endDT <- '2017-01-01T00:00:00.000Z'
fmt <- 'application/netcdf'
incl_prov <- 'true'
incl_anno <- 'true'
data_request_url <- sprintf('%s?beginDT=%s&endDT=%s&format=%s&include_provenance=%s&include_annotations=%s',
data_request_url, beginDT, endDT, fmt, incl_prov, incl_anno)
# Now let's send the request. We do this here with the httr package,
# which is similar to Python's Requests library. (Note, you only need to
# send the request once to generate the data files. After which, we
# recommend commenting out the request lines to prevent accidental
# resubmission when running through the script again.
library(httr)
# response <- GET(data_request_url, authenticate(API_USERNAME, API_TOKEN))
# Now, let's read the JSON response to extract the URL where the data
# files can be found. The jsonlite package makes it easier to work
# with data in JSON format.
library(jsonlite)
info <- fromJSON((content(response, 'text')), flatten = TRUE)
# Which data URL should I use?
# The first URL in the allURLs key points to the THREDDS server, which
# allows for programmatic data access without downloading the entire file.
# The second URL in the allURLs key provides a direct link to a web
# server which you can use to quickly download files if you don't want
# to go through THREDDS.
# info$allURLs[1] = "https://opendap.oceanobservatories.org/thredds/catalog/ooi/[email protected]/20180611T191824-CP04OSSM-SBD11-06-METBKA000-telemetered-metbk_a_dcl_instrument/catalog.html"
# info$allURLs[2] = "https://opendap.oceanobservatories.org/async_results/[email protected]/20180611T191824-CP04OSSM-SBD11-06-METBKA000-telemetered-metbk_a_dcl_instrument"
# -------------------Step 2 - Loading in NetCDF Files-------------------
# Once the dataset is ready on the server, we can start using it.
# For this example, it turns out that there are 3 different deployments
# spanning 2016, and each deployment has its own file. In order to load
# in all of the data, we will need to concatenate the data from all of the
# files. The easiest way to do this is to create a vector of all of the
# data files and then loop through it. Using the THREDDS catalog link (the
# first url above), click on each metbk .nc file, then click on the OPENDAP
# link, and then copy the "Data URL" and add that to your vector.
urls <- c('https://opendap.oceanobservatories.org/thredds/dodsC/ooi/[email protected]/20180611T191824-CP04OSSM-SBD11-06-METBKA000-telemetered-metbk_a_dcl_instrument/deployment0003_CP04OSSM-SBD11-06-METBKA000-telemetered-metbk_a_dcl_instrument_20160101T000102.577000-20160513T150947.552000.nc',
'https://opendap.oceanobservatories.org/thredds/dodsC/ooi/[email protected]/20180611T191824-CP04OSSM-SBD11-06-METBKA000-telemetered-metbk_a_dcl_instrument/deployment0004_CP04OSSM-SBD11-06-METBKA000-telemetered-metbk_a_dcl_instrument_20160527T155046.777000-20161012T150931.977000.nc',
'https://opendap.oceanobservatories.org/thredds/dodsC/ooi/[email protected]/20180611T191824-CP04OSSM-SBD11-06-METBKA000-telemetered-metbk_a_dcl_instrument/deployment0005_CP04OSSM-SBD11-06-METBKA000-telemetered-metbk_a_dcl_instrument_20161012T142641.906000-20161231T235909.571000.nc')
# Now we can loop over each file and load the parameters we want. Here we
# use the ncdf4 library to read the files. nc_open(filename) dumps the
# contents of the file. Once a file is open, names(f$var) lists the
# variable names available in the file. You can also get more information
# about the variables in the file using Panoply.
library(ncdf4)
# Setup empty vectors
dtime <- c()
air_temperature <- c()
sea_surface_temperature <- c()
for (jj in 1:length(urls)){
f <- nc_open(urls[jj])
dtime <- c(dtime, ncvar_get(f, 'time'))
air_temperature <- c(air_temperature, ncvar_get(f, 'air_temperature'))
sea_surface_temperature <- c(sea_surface_temperature, ncvar_get(f, 'sea_surface_temperature'))
}
# Convert timestamp to date
dtime <- as.POSIXct(dtime, origin = '1900-01-01', tz = "UTC")
# Because this data set is so dense, we are going to calculate hourly
# averages to make plotting more manageable. Use the lubridate package
# to round the date/times to the nearest hour, and create a dataframe
# with the aggregated dataset.
library(lubridate)
rtime_hour <- round_date(dtime, unit = "hour")
havg <- setNames(aggregate(cbind(air_temperature, sea_surface_temperature), list(rtime_hour), mean, na.rm = TRUE),
c("time_hh", "air_temp_mean", "sea_surf_temp_mean"))
# ---------------------Step 3 - Plotting the Results---------------------
# Note: plotly files can be saved as interactive web-based graphs.
# See https://plot.ly/r/getting-started/
# However here we're saving the plots as local .png files using the export()
# function with the webshot package: https://github.com/wch/webshot/
library(plotly)
library(webshot)
# Pull the source attribute to use as the plot title
source <- ncatt_get(f, 0, 'source')$value
xax <- list(showline = TRUE)
yax <- list(title = 'Hourly-Averaged Temperature (C)', showline = TRUE, zeroline = FALSE)
mk1 <- list(size = 3, color = 'red')
mk2 <- list(size = 3, color = 'blue')
p <- plot_ly(havg, x = havg$time_hh, y = havg$air_temp_mean, name = 'Air Temperature',
type = "scatter", mode = "markers", marker = mk1) %>%
add_trace(y = havg$sea_surf_temp_mean, name = 'Sea Surface Temperature',
type = "scatter", mode = "markers", marker = mk2) %>%
layout(title = source, xaxis = xax, yaxis = yax, legend = list(x = 0.7, y = 0.05))
p
sname <- sprintf('./%s_timeseries.png', source)
export(p, file = sname)