-
Notifications
You must be signed in to change notification settings - Fork 0
/
01_preprocessed data.R
209 lines (145 loc) · 6.39 KB
/
01_preprocessed data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
rm(list = ls())
# Libs and funs ----
library(lubridate)
library(tidyverse)
library(tidyr)
`%notin%` <- Negate(`%in%`)
hours_blocks <- function(x) {
nhours <- length(rle(x)$values)
each_nhours <- rle(x)$lengths
rep(seq(1, nhours ,1), each_nhours)
} # requires already somehow grouped data; gives consecutive numbers for the distinguishable blocks
# Read meta data data ----
birdsID_df <- readxl::read_excel("C:/Users/User/Dropbox/VIP experiment/Files for VIP coordination.xlsx")
# get sex-treatment data
birdsID_dt_females <- birdsID_df %>%
mutate(sex = "female",
birdID = str_sub(female, 3, nchar(female))) %>%
rename(treatment = fem_trt) %>%
select(birdID, sex, treatment) %>%
distinct()
birdsID_dt_males <- birdsID_df %>%
mutate(sex = "male",
birdID = str_sub(male, 3, nchar(male))) %>%
rename(treatment = male_trt) %>%
select(birdID, sex, treatment) %>%
distinct()
birdsID_dt <- bind_rows(birdsID_dt_females, birdsID_dt_males)
# get nest data
nest_dt_females <- birdsID_df %>%
mutate(role = "female_parent",
birdID = str_sub(female, 3, nchar(female))) %>%
select(nest, year, hatch_date, p6, p14, wave, role, birdID, file_name, min_age) %>%
distinct()
nest_dt_males <- birdsID_df %>%
mutate(role = "male_parent",
birdID = str_sub(male, 3, nchar(male))) %>%
select(nest, year, hatch_date, p6, p14, wave, role, birdID, file_name, min_age) %>%
distinct()
nest_dt <- bind_rows(nest_dt_females, nest_dt_males)
# Read all RFID data ----
folders_list <- list.files("C:/Users/User/Dropbox/VIP experiment/rfid_data")
folder_files_df <- list()
for(j in 1: length(folders_list)) {
folder_path <- paste0("C:/Users/User/Dropbox/VIP experiment/rfid_data/", folders_list[j], sep = "")
files_in_folder <- list.files(path = folder_path)
# inner loop - start (add read each file from year-folder and add the file ID)
files_df <- list()
for(i in 1:length(files_in_folder)) {
file_path <- paste0(folder_path, "/", files_in_folder[i], sep = "")
files_df[[i]] <- readr::read_csv2(file_path, id = "file_id")
}
folder_files_df_temp <- plyr::ldply(files_df, data.frame)
# inner loop - end
folder_files_df[[j]] <- folder_files_df_temp
}
rfid_alldt <- plyr::ldply(folder_files_df, data.frame)
# Data checking/tuning ----
rfid_alldt_temp_df <- rfid_alldt %>%
mutate(date = mdy(Date.),
date_time = ymd_hms(paste0(date, " ", as.character(Time.), sep = "")),
year = as.integer(str_sub(file_id, 48,51)),
updated_date_time = update(date_time, year = year),
birdID = str_sub(Transponder.Code., 4, nchar(Transponder.Code.)),
file_name = str_sub(file_id, 53, nchar(file_id)-4),
rownms = row_number())
# in some files dates don't parse (dmy instead mdy as in most of the files)
badrecords1 <- rfid_alldt_temp_df %>% # bad records - list
filter(is.na(date)) %>%
group_by(rownms, Date.) %>%
summarise(n = n())
rfid_alldt_temp_df_bad1 <- rfid_alldt_temp_df %>% # bad records - fixing
filter(rownms %in% unique(badrecords1$rownms)) %>%
mutate(date = dmy(Date.),
date_time = ymd_hms(paste0(date, " ", as.character(Time.), sep = "")),
updated_date_time = update(date_time, year = year))
rfid_alldt_temp_df_good1 <- rfid_alldt_temp_df %>% # good records only
filter(rownms %notin% unique(badrecords1$rownms))
rfid_alldt_temp_df2 <- bind_rows(rfid_alldt_temp_df_good1, rfid_alldt_temp_df_bad1) # good and bad records
# still in some files dates don't parse but those are simply missing date - to remove
badrecords2 <- rfid_alldt_temp_df2 %>% # bad records - check
filter(is.na(date_time))
rfid_alldt_temp_df3 <- rfid_alldt_temp_df2 %>%
filter(!is.na(updated_date_time)) %>%
select(rownms, file_name, year, updated_date_time, birdID)
# add sex and treatment data
rfid_alldt_temp_df4 <- left_join(rfid_alldt_temp_df3, birdsID_dt, by = "birdID")
# fixing some artefacts
rfid_alldt_temp_df4 <- rfid_alldt_temp_df4 %>%
mutate(birdID = if_else(birdID == "3308", "7143308", birdID))
rfid_alldt_temp_df4 <- rfid_alldt_temp_df4 %>%
mutate(birdID = if_else(birdID == "1499", "7141499", birdID))
rfid_alldt_temp_df4 <- rfid_alldt_temp_df4 %>%
mutate(birdID = if_else(birdID == "1719", "7141719", birdID))
rfid_alldt_temp_df4 <- rfid_alldt_temp_df4 %>%
mutate(birdID = if_else(birdID == "1996", "7141996", birdID))
# add parent, nest, hatching date/wave info
rfid_alldt_temp_df5 <- left_join(rfid_alldt_temp_df4, nest_dt, by = c("year", "birdID", "file_name"))
# experiment/nest grouping
rfid_alldt_temp_df5 <- rfid_alldt_temp_df5 %>%
mutate(sx_treat = paste0(sex, "_", treatment, sep = ""))
rfid_alldt_temp_df5_temp <- rfid_alldt_temp_df5 %>%
filter(!is.na(role))
nest_groups <- rfid_alldt_temp_df5_temp %>%
select(file_name, sex, treatment) %>%
distinct() %>%
pivot_wider(values_from = treatment, names_from = sex) %>%
mutate(fm_treatment = paste0(female, "_", male, sep = "")) %>%
select(file_name, fm_treatment)
# summary table for exp groups
nest_groups %>%
group_by(fm_treatment) %>%
summarise(n = n())
rfid_alldt_temp_df6 <- left_join(rfid_alldt_temp_df5, nest_groups, by = "file_name")
rfid_alldt_temp_df6 <- rfid_alldt_temp_df6 %>%
mutate(fm_treatment_pooled = case_when(
fm_treatment == "control_NA" ~ "control_control",
fm_treatment == "androgen_NA" ~ "androgen_control",
fm_treatment == "NA_control" ~ "control_control",
fm_treatment == "NA_androgen" ~ "control_androgen",
fm_treatment == "NA_NA" ~ "control_control",
.default = as.character(fm_treatment)
))
# add hours/rec
rfid_alldt_temp_df6 <- rfid_alldt_temp_df6 %>%
mutate(hr = hour(updated_date_time)) %>%
group_by(file_name) %>%
mutate(hr_rec = hours_blocks(hr))
# tuning up
rfid_alldt_temp_df6 <- rfid_alldt_temp_df6 %>%
rename(date_time = updated_date_time,
ind_treatment = treatment) %>%
select(-sx_treat)
# fix chicks age
rfid_alldt_temp_df6x <- rfid_alldt_temp_df6 %>%
mutate(p6 = if_else(is.na(p6) & !is.na(p14), p14, p6)) %>%
group_by(year, file_name) %>%
fill(p6, .direction = "downup")
# filtering the most external rec hours
# (for every per hour calculations it may be biased as being not a full hour of recording)
rfid_alldt_temp_df7 <- rfid_alldt_temp_df6 %>%
group_by(file_name) %>%
filter(hr_rec != 1,
hr_rec != max(hr_rec)) %>%
ungroup()
saveRDS(rfid_alldt_temp_df7, "01_preprocessed_data.rds")