-
Notifications
You must be signed in to change notification settings - Fork 0
/
Question4Analysis.R
74 lines (51 loc) · 2.23 KB
/
Question4Analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
library("dplyr")
library("tidyr")
library("ggplot2")
virginia_tech <- read.csv("data/vt_grades.csv", stringsAsFactors = FALSE)
uw <- read.csv("data/uw_grades.csv", stringsAsFactors = FALSE)
uw <- uw %>%
filter(Term == "20154 (Autumn 2015)") %>%
select("Student_Count", "Average_GPA")
uw_small <- uw %>%
filter(Student_Count < 25) %>%
mutate(Size = "Less than 25")
uw_medium <- uw %>%
filter(Student_Count > 25 & Student_Count < 50) %>%
mutate(Size = "25-50")
uw_large <- uw %>%
filter(Student_Count > 50) %>%
mutate(Size = "Greater than 50")
uw <- full_join(uw_small, uw_medium, by = c("Student_Count", "Average_GPA", "Size"))
uw <- full_join(uw, uw_large, by = c("Student_Count", "Average_GPA", "Size"))
uw_summary <- uw %>%
group_by(Size) %>%
summarize( Average_GPA = mean(Average_GPA)) %>%
mutate(School = "University of Washington")
virginia_tech <- virginia_tech %>%
filter( Number_of_students < 2000) %>%
select("Number_of_students", "GPA")
colnames(virginia_tech) <- c("Student_Count", "Average_GPA")
vt_small <- virginia_tech %>%
filter(Student_Count < 25) %>%
mutate(Size = "Less than 25")
vt_medium <- virginia_tech %>%
filter(Student_Count > 25 & Student_Count < 50) %>%
mutate(Size = "25-50")
vt_large <- virginia_tech %>%
filter(Student_Count > 50) %>%
mutate(Size = "Greater than 50")
virginia_tech <- full_join(vt_small, vt_medium, by = c("Student_Count", "Average_GPA", "Size"))
virginia_tech <- full_join(virginia_tech, vt_large, by = c("Student_Count", "Average_GPA", "Size"))
vt_summary <- virginia_tech %>%
group_by(Size) %>%
summarize( Average_GPA = mean(Average_GPA)) %>%
mutate(School = "Virginia Tech")
combined_data <- full_join(uw_summary, vt_summary, by = c("Size", "Average_GPA", "School"))
positions <- c("Less than 25", "25-50", "Greater than 50")
ggplot(data = combined_data, aes(x = Size, y = Average_GPA, fill = School)) +
geom_bar(stat="identity", position=position_dodge()) +
scale_x_discrete(limits = positions) +
scale_fill_manual(values = c('#8856a7','#99000d')) +
labs(title = "GPA Based on Class Size", y = "Average GPA") +
theme(plot.title = element_text(hjust = 0.5)) +
ggsave('q4.png',width=6, height=4,dpi=300)