-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmanipulate.Rmd
147 lines (111 loc) · 2.18 KB
/
manipulate.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
---
title: "Manipulate"
author: "Catalina Roman"
date: "28-06-2021"
output: html_document
editor_options:
chunk_output_type: console
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## Read online table
### Download table ("\*.csv")
```{r}
#set variables
csv_url<- "https://oceanview.pfeg.noaa.gov/erddap/tabledap/cciea_AC.csv"
dir_data <- "data"
# derived variables
csv <- file.path(dir_data, basename(csv_url))
#create directory
dir.create(dir_data)
#download file
download.file(csv_url, csv)
```
## Read table `read.csv()`
```{r}
# attempt to read csv
d <- read.csv(csv)
# show the data frame
d
#convert to a table
tibble::tibble(d)
# read csv by skipping first two lines, so no header
d <- read.csv(csv, skip = 2, header = FALSE)
d
# update data frame to original column names
names(d) <- names(read.csv(csv))
d
# update for future reuse (NEW!)
write.csv(d, csv, row.names = F)
```
```yaml
editor_options:
chunk_output_type: console
```
### Show table `DT::datatable()`
```{r}
# show table
DT::datatable(d)
```
## Wrangle data
### Manipulate with `dplyr`
```{r}
library(DT)
library(dplyr)
d <- d %>%
# tibble
tibble() %>%
# mutate time
mutate(
time = as.Date(substr(time, 1, 10))) %>%
# select columns
select(
time,
starts_with("total_fisheries_revenue")) %>%
# filter rows
filter(
time >= as.Date("1981-01-01"))
datatable(d)
```
### Tidy with `tidyr`
```{r}
library(tidyr)
d <- d %>%
pivot_longer(-time)
datatable(d)
```
### Summarize with `dplyr`
```{r}
library(stringr)
d <- d %>%
mutate(
region = str_replace(name, "total_fisheries_revenue_", "")) %>%
select(time, region, value)
datatable(d)
d_sum <- d %>%
group_by(region) %>%
summarize(
avg_revenue = mean(value))
datatable(d_sum) %>%
formatCurrency("avg_revenue")
```
### Apply functions with `purrr` on a `nest`'ed `tibble`
```{r}
library(purrr)
n <- d %>%
group_by(region) %>%
nest(
data = c(time, value))
n
n <- n %>%
mutate(
lm = map(data, function(d){
lm(value ~ time, d) } ),
trend = map_dbl(lm, function(m){
coef(summary(m))["time","Estimate"] }))
n
n %>%
select(region, trend) %>%
datatable()
```