-
Notifications
You must be signed in to change notification settings - Fork 12
/
92-AppendixC-RECS-CB.Rmd
114 lines (87 loc) · 2.66 KB
/
92-AppendixC-RECS-CB.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# RECS derived variable codebook {#recs-cb}
```{r}
#| label: recs-cb-cb-setup
#| echo: FALSE
#| error: FALSE
#| warning: FALSE
#| message: FALSE
library(tidyverse)
library(janitor)
library(kableExtra)
library(knitr)
data(recs_2020)
```
The full codebook with the original variables is available at [https://www.eia.gov/consumption/residential/data/2020/index.php?view=microdata](https://www.eia.gov/consumption/residential/data/2020/index.php?view=microdata) - "Variable and Response Codebook."
This is a codebook for the RECS data used in this book (`recs_2020`) from the {srvyrexploR} package.
```{r}
#| label: recs-cb-prep
#| echo: FALSE
attrlist <- map(recs_2020, attributes)
recs_var_info <- tibble(
Vars=names(attrlist),
Section=map_chr(attrlist, "Section") %>% unname(),
Question=map(attrlist, "Question") %>% map(NULL_to_NA) %>% unlist(use.names = FALSE),
Description=map_chr(attrlist, "label") %>% unname(),
VarType=map(recs_2020, class) ,
) %>%
mutate(
VarType=if_else(Vars=="DOEID", list("ID"), VarType)
)
cb_count <- function(dat, var){
t <- dat %>%
count(.data[[var]]) %>%
mutate(`Unweighted Freq` = n / sum(n)) %>%
janitor::adorn_totals(where="row", fill="-", na.rm=TRUE, name="Total", n, `Unweighted Freq`) %>%
mutate(`Unweighted Freq`= round(`Unweighted Freq`, 3)) %>%
kbl(position="H")
if (knitr:::is_html_output()){
t %>% kable_minimal() %>% print()
} else{
t %>% print()
}
}
cb_continuous <- function(dat, var){
t <- dat %>%
summarize(
`N Missing`=sum(is.na(.data[[var]])),
Minimum = min(.data[[var]], na.rm = TRUE),
Median = median(.data[[var]], na.rm = TRUE),
Maximum = max(.data[[var]], na.rm = TRUE)) %>%
kbl(position="H")
if (knitr:::is_html_output()){
t %>% kable_minimal() %>% print()
} else{
t %>% print()
}
}
make_section <- function(sec){
cat(str_c("## ", sec, "\n\n"))
make_sum <- function(var){
cat(str_c("#### ", var, " {-} \n\n"))
vi <- recs_var_info %>% filter(Vars==var)
de <- vi %>% pull(Description)
cat(str_c("Description: ", de, "\n\n"))
qt <- vi %>% pull(Question)
if (!is.na(qt)) cat(str_c("Question text: ", qt, "\n\n"))
vt <- vi %>% pull(VarType) %>% unlist()
if (any(c("factor", "character", "logical") %in% vt)){
recs_2020 %>% cb_count(var)
cat("\n")
} else if ("numeric" %in% vt){
recs_2020 %>% cb_continuous(var)
cat("\n")
}
}
recs_var_info %>% filter(Section==sec) %>% pull(Vars) %>%
walk(make_sum)
}
```
```{r}
#| label: recs-cb-write
#| echo: FALSE
#| results: asis
recs_var_info %>%
distinct(Section) %>%
pull(Section) %>%
walk(make_section)
```