forked from se-sic/coronet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
util-plot-evaluation.R
131 lines (110 loc) · 5.96 KB
/
util-plot-evaluation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
## This file is part of coronet, which is free software: you
## can redistribute it and/or modify it under the terms of the GNU General
## Public License as published by the Free Software Foundation, version 2.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
##
## Copyright 2019 by Klara Schlüter <[email protected]>
## Copyright 2021 by Thomas Bock <[email protected]>
## All Rights Reserved.
## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
## Libraries ---------------------------------------------------------------
requireNamespace("ggplot2") ## plotting
requireNamespace("reshape2") ## melting data
## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
## Plots regarding commit edit and editor types ---------------------------------------
#' Produces a barplot showing for every editor the number of commits for which he is only author, only committer, and
#' both author and committer.
#'
#' @param data the project data
#' @param percentage.per.author if \code{TRUE}, the barplot shows the relative number of differently edited commits per
#' author: each bar in the barplot (representing the commits of one editor) is scaled to
#' 100%. Otherwise, the absolute number of commits per author is shown in the plot.
#' [default: FALSE]
#'
#' @return a ggplot2/ggraph plot object
plot.commit.editor.types.by.author = function(data, percentage.per.author = FALSE) {
## get editor data
and = get.committer.and.author.commit.count(data)
or = get.committer.not.author.commit.count(data)
## build data frame as required for plotting
both = data.frame(and[["author.name"]], and[["freq"]])
colnames(both) = c("editor", "author.and.committer")
author = aggregate(or[["freq"]], by = list(or[["author.name"]]), FUN = sum)
colnames(author) = c("editor", "only.author")
committer = aggregate(or[["freq"]], by = list(or[["committer.name"]]), FUN = sum)
colnames(committer) = c("editor", "only.committer")
plot.data = merge(merge(both, author, all = TRUE), committer, all = TRUE)
plot.data[is.na(plot.data)] = 0
## if desired, calculate percentage of editor types per author
if (percentage.per.author) {
name.column = plot.data[1]
value.columns = plot.data[2:4]
## scale data values per author (represented by one line) to 100%
scaled.value.columns = t(apply(value.columns, 1, function(x) {x / sum(x)}))
plot.data = cbind(name.column, scaled.value.columns)
}
## compute order of bars from data: only author < author and committer < only committer
ordered.editors = plot.data[["editor"]][with(plot.data,
order(`only.committer`, `author.and.committer`, `only.author`))]
## prepare data for a stacked barplot (prepare for stacking the editor types)
plot.data = reshape2::melt(plot.data)
names(plot.data) = c("editor", "editor.type", "commit.count")
## draw plot
plot = ggplot2::ggplot(data = plot.data, mapping = ggplot2::aes(x = factor(editor, levels = ordered.editors),
y = `commit.count`, fill = `editor.type`)) +
## use data frame values instead of counting entries
ggplot2::geom_bar(stat = 'identity') +
## rotate y-axis labels by 90 degree
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, hjust = 1)) +
## set proper legend items and title
ggplot2::scale_fill_discrete(name = "Commit edit type",
labels = c("author and committer", "only author", "only committer")) +
## add proper axis labels
ggplot2::labs(
x = "Authors",
y = "Commit count"
)
return(plot)
}
#' Produces a barplot showing for how many commits committer and author are the same person and for how many commits
#' committer and author are different.
#'
#' @param data the project data
#' @param relative.y.scale if \code{TRUE}, the y axis shows the percentage of the number of commits of the special edit
#' type with respect to all commits. If \code{FALSE}, the y axis shows the absolut number of
#' commits. [default: FALSE]
#'
#' @return a ggplot2/ggraph plot object
plot.commit.edit.types.in.project = function(data, relative.y.scale = FALSE) {
## get commit count
and = get.committer.and.author.commit.count(data)
or = get.committer.not.author.commit.count(data)
## build data frame as required for plotting
plot.data = data.frame(c("author.!=.committer", "author.=.committer"), c(sum(or[["freq"]]), sum(and[["freq"]])))
colnames(plot.data) = c("edit.types", "commit.count")
## if desired, calculate values for y axis labes showing percentage of all commits
if (relative.y.scale) {
plot.data = cbind(plot.data[1], plot.data[2] / sum(plot.data[2]))
}
## draw plot
plot = ggplot2::ggplot(data = plot.data, mapping = ggplot2::aes(y = `commit.count`, x = `edit.types`)) +
## use data frame values instead of counting entries
ggplot2::geom_bar(stat = 'identity') +
## set proper bar labels
ggplot2::scale_x_discrete(labels = c("author.!=.committer" = "author != committer",
"author.=.committer" = "author = committer")) +
## add proper axis labels
ggplot2::labs(
x = "Edit types",
y = "Commit count"
)
return(plot)
}