-
Notifications
You must be signed in to change notification settings - Fork 3
/
paper.bib
290 lines (265 loc) · 13.1 KB
/
paper.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
@InProceedings{10.1007/978-3-030-84924-5_6,
author="Erdelt, Patrick K.",
editor="Nambiar, Raghunath
and Poess, Meikel",
title="A Framework for Supporting Repetition and Evaluation in the Process of Cloud-Based DBMS Performance Benchmarking",
booktitle="Performance Evaluation and Benchmarking",
year="2021",
publisher="Springer International Publishing",
address="Cham",
pages="75--92",
abstract="Performance benchmarking of Database Management Systems (DBMS) is an important yet complicated process. We motivate and present two supporting Python packages which help to avoid common pitfalls and in particular improve reproducibility and transparency in heterogeneous systems with hardware accelerators. The first addresses operational aspects by providing dynamic testbeds using Docker images, especially for cloud-based systems. The second helps planning and recurrently running experiments in a predefined setup via JDBC/SQL, and analyzing results with automated reports and an interactive dashboard. The purpose of this is to thoroughly evaluate aspects of performances of DBMS based on real-life measurements, runtime and hardware metrics, depending on various parameters including the hardware, and with high repeatability. We present a series of TPC-H inspired example benchmarks in a Kubernetes cluster for demonstration, and some lessons learned.",
isbn="978-3-030-84924-5",
url = {https://link.springer.com/chapter/10.1007/978-3-030-84924-5_6},
doi = {10.1007/978-3-030-84924-5_6}
}
@InProceedings{10.1007/978-3-030-94437-7_6,
author="Erdelt, Patrick K.",
editor="Nambiar, Raghunath
and Poess, Meikel",
title="Orchestrating DBMS Benchmarking in the Cloud with Kubernetes",
booktitle="Performance Evaluation and Benchmarking",
year="2022",
publisher="Springer International Publishing",
address="Cham",
pages="81--97",
abstract="Containerization has become a common practise in software provisioning. Kubernetes (K8s) is useful in deploying containers in clusters, in managing their lifecycle, in scheduling and resource allocation. The benchmarking process requires the interaction of various components. We propose a way to organize benchmarking in the Cloud by looking at typical components in the process and ask if they could be managed by K8s as containerized Microservices. We aim at scalability for the process, parallelized execution and minimized traffic I/O from and into the Cloud. This supports planning a series of experiments to investigate a high-dimensional parameter space and avoiding complex installations. This also provides a way for Cross-Cloud comparison.",
isbn="978-3-030-94437-7",
url = {https://link.springer.com/chapter/10.1007/978-3-030-94437-7_6},
doi = {10.1007/978-3-030-94437-7_6}
}
@InProceedings{10.1007/978-3-319-67162-8_12,
author="Seybold, Daniel
and Domaschka, J{\"o}rg",
title="Is Distributed Database Evaluation Cloud-Ready?",
booktitle="New Trends in Databases and Information Systems",
year="2017",
publisher="Springer International Publishing",
address="Cham",
pages="100--108",
abstract="The database landscape has significantly evolved over the last decade as cloud computing enables to run distributed databases on virtually unlimited cloud resources. Hence, the already non-trivial task of selecting and deploying a distributed database system becomes more challenging. Database evaluation frameworks aim at easing this task by guiding the database selection and deployment decision. The evaluation of databases has evolved as well by moving the evaluation focus from performance to distribution aspects such as scalability and elasticity. This paper presents a cloud-centric analysis of distributed database evaluation frameworks based on evaluation tiers and framework requirements. It analysis eight well adopted evaluation frameworks. The results point out that the evaluation tiers performance, scalability, elasticity and consistency are well supported, in contrast to resource selection and availability. Further, the analysed frameworks do not support cloud-centric requirements but support classic evaluation requirements.",
isbn="978-3-319-67162-8",
url = {https://link.springer.com/chapter/10.1007/978-3-319-67162-8_12},
doi = {10.1007/978-3-319-67162-8_12}
}
@InProceedings{10.1007/978-3-030-12079-5_4,
author="Brent, Lexi
and Fekete, Alan",
editor="Chang, Lijun
and Gan, Junhao
and Cao, Xin",
title="A Versatile Framework for Painless Benchmarking of Database Management Systems",
booktitle="Databases Theory and Applications",
year="2019",
publisher="Springer International Publishing",
address="Cham",
pages="45--56",
abstract="Benchmarking is a crucial aspect of evaluating database management systems. Researchers, developers, and users utilise industry-standard benchmarks to assist with their research, development, or purchase decisions, respectively. Despite this ubiquity, benchmarking is usually a difficult process involving laborious tasks such as writing and debugging custom testbed scripts, or extracting and transforming output into useful formats. To date, there are only a limited number of comprehensive benchmarking frameworks designed to tackle these usability and efficiency challenges directly.",
isbn="978-3-030-12079-5",
url={https://link.springer.com/chapter/10.1007/978-3-030-12079-5_4},
doi = {10.1007/978-3-030-12079-5_4}
}
@inproceedings{Raasveldt2018FBC32099503209955,
author = {Raasveldt, Mark and Holanda, Pedro and Gubner, Tim and M\"{u}hleisen, Hannes},
title = {Fair Benchmarking Considered Difficult: Common Pitfalls In Database Performance Testing},
booktitle = {Proceedings of the Workshop on Testing Database Systems},
series = {DBTest'18},
year = {2018},
isbn = {978-1-4503-5826-2},
location = {Houston, TX, USA},
pages = {2:1--2:6},
articleno = {2},
numpages = {6},
url = {http://doi.acm.org/10.1145/3209950.3209955},
acmid = {3209955},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {Benchmarking, Performance Evaluation},
doi = {10.1145/3209950.3209955}
}
@inproceedings{DBLPconfsigmodKerstenKZ18,
author = {Martin L. Kersten and
Panagiotis Koutsourakis and
Ying Zhang},
editor = {Alexander B{\"{o}}hm and
Tilmann Rabl},
title = {Finding the Pitfalls in Query Performance},
booktitle = {Proceedings of the 7th {I}nternational {W}orkshop on {T}esting {D}atabase
{S}ystems, DBTest@SIGMOD 2018,},
pages = {3:1--3:6},
publisher = {{ACM}},
year = {2018},
url = {https://doi.org/10.1145/3209950.3209951},
timestamp = {Mon, 12 Aug 2019 13:49:51 +0200},
biburl = {https://dblp.org/rec/conf/sigmod/KerstenKZ18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org},
doi = {10.1145/3209950.3209951},
}
@software{reback2020pandas,
author = {{The pandas development team}},
title = {pandas-dev/pandas: Pandas},
month = feb,
year = 2020,
publisher = {Zenodo},
version = {latest},
doi = {10.5281/zenodo.3509134},
url = {https://doi.org/10.5281/zenodo.3509134}
}
@InProceedings{mckinney-proc-scipy-2010,
author = { Wes {M}c{K}inney },
title = { {D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython },
booktitle = { {P}roceedings of the 9th {P}ython in {S}cience {C}onference },
pages = { 56 - 61 },
year = { 2010 },
editor = { {S}t\'efan van der {W}alt and {J}arrod {M}illman },
doi = { 10.25080/Majora-92bf1922-00a }
}
@conference {208870,
author = {Bjorn Rabenstein and Julius Volz},
title = {Prometheus: A Next-Generation Monitoring System (Talk)},
year = {2015},
address = {Dublin},
publisher = {{USENIX} Association},
month = may,
}
@conference{Kluyver2016jupyter,
Title = {Jupyter Notebooks -- a publishing format for reproducible computational workflows},
Author = {Thomas Kluyver and Benjamin Ragan-Kelley and Fernando P{\'e}rez and Brian Granger and Matthias Bussonnier and Jonathan Frederic and Kyle Kelley and Jessica Hamrick and Jason Grout and Sylvain Corlay and Paul Ivanov and Dami{\'a}n Avila and Safia Abdalla and Carol Willing},
Booktitle = {Positioning and Power in Academic Publishing: Players, Agents and Agendas},
Editor = {F. Loizides and B. Schmidt},
Organization = {IOS Press},
Pages = {87 - 90},
Year = {2016},
doi = {10.3233/978-1-61499-649-1-87}
}
@Article{Hunter:2007,
Author = {Hunter, J. D.},
Title = {Matplotlib: A 2D graphics environment},
Journal = {Computing in Science \& Engineering},
Volume = {9},
Number = {3},
Pages = {90--95},
abstract = {Matplotlib is a 2D graphics package used for Python for
application development, interactive scripting, and publication-quality
image generation across user interfaces and operating systems.},
publisher = {IEEE COMPUTER SOC},
doi = {10.1109/MCSE.2007.55},
year = 2007
}
@ARTICLE{2020SciPy-NMeth,
author = {Virtanen, Pauli and Gommers, Ralf and Oliphant, Travis E. and
Haberland, Matt and Reddy, Tyler and Cournapeau, David and
Burovski, Evgeni and Peterson, Pearu and Weckesser, Warren and
Bright, Jonathan and {van der Walt}, St{\'e}fan J. and
Brett, Matthew and Wilson, Joshua and Millman, K. Jarrod and
Mayorov, Nikolay and Nelson, Andrew R. J. and Jones, Eric and
Kern, Robert and Larson, Eric and Carey, C J and
Polat, {\.I}lhan and Feng, Yu and Moore, Eric W. and
{VanderPlas}, Jake and Laxalde, Denis and Perktold, Josef and
Cimrman, Robert and Henriksen, Ian and Quintero, E. A. and
Harris, Charles R. and Archibald, Anne M. and
Ribeiro, Ant{\^o}nio H. and Pedregosa, Fabian and
{van Mulbregt}, Paul and {SciPy 1.0 Contributors}},
title = {{{SciPy} 1.0: Fundamental Algorithms for Scientific
Computing in Python}},
journal = {Nature Methods},
year = {2020},
volume = {17},
pages = {261--272},
adsurl = {https://rdcu.be/b08Wh},
doi = {10.1038/s41592-019-0686-2},
}
@book{KounevLK20,
author = {Samuel Kounev and
Klaus{-}Dieter Lange and
J{\'{o}}akim von Kistowski},
title = {Systems Benchmarking - For Scientists and Engineers},
publisher = {Springer},
year = {2020},
url = {https://doi.org/10.1007/978-3-030-41705-5},
doi = {10.1007/978-3-030-41705-5},
isbn = {978-3-030-41704-8},
timestamp = {Tue, 08 Sep 2020 01:00:00 +0200},
biburl = {https://dblp.org/rec/books/sp/KounevLK20.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@book{series/utcs/IgualS17,
added-at = {2018-11-02T00:00:00.000+0100},
author = {Igual, Laura and Seguí, Santi},
biburl = {https://www.bibsonomy.org/bibtex/2df957256a471cf3626c5a280424a2b9a/dblp},
ee = {https://doi.org/10.1007/978-3-319-50017-1},
doi = {10.1007/978-3-319-50017-1},
interhash = {1f6d348e064db5ee3a118549b35cbb04},
intrahash = {df957256a471cf3626c5a280424a2b9a},
isbn = {978-3-319-50017-1},
keywords = {dblp},
pages = {1-215},
publisher = {Springer},
series = {Undergraduate Topics in Computer Science},
timestamp = {2018-11-03T12:45:44.000+0100},
title = {Introduction to Data Science - A {P}ython Approach to Concepts, Techniques and Applications},
year = 2017
}
@article{Waskom2021,
doi = {10.21105/joss.03021},
url = {https://doi.org/10.21105/joss.03021},
year = {2021},
publisher = {The Open Journal},
volume = {6},
number = {60},
pages = {3021},
author = {Michael L. Waskom},
title = {seaborn: statistical data visualization},
journal = {Journal of Open Source Software}
}
@misc{TIOBE,
author = {TIOBE},
title = {{TIOBE Index - TIOBE}},
journal = {TIOBE},
year = {2022},
month = jun,
note = {[Online; accessed 31. Jul. 2022]},
url = {https://www.tiobe.com/tiobe-index}
}
@misc{PYPL,
author = {PYPL},
title = {{PYPL PopularitY of Programming Language index}},
year = {2022},
month = jul,
note = {[Online; accessed 31. Jul. 2022]},
url = {https://pypl.github.io/PYPL.html}
}
@inproceedings{10114533389063338912,
author = {He, Sen and Manns, Glenna and Saunders, John and Wang, Wei and Pollock, Lori and Soffa, Mary Lou},
title = {A Statistics-Based Performance Testing Methodology for Cloud Applications},
year = {2019},
isbn = {9781450355728},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3338906.3338912},
booktitle = {Proceedings of the 2019 27th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering},
pages = {188--199},
numpages = {12},
keywords = {cloud computing, resource contention, performance testing, non- parametric statistics},
location = {Tallinn, Estonia},
series = {ESEC/FSE 2019},
doi = {10.1145/3338906.3338912},
}
@misc{DBDBIO,
author = {{Carnegie Mellon Database Group}},
title = {{Database of Databases}},
journal = {Database of Databases},
year = {2022},
month = aug,
note = {[Online; accessed 1. Aug. 2022]},
url = {https://dbdb.io}
}
@misc{DBEngines,
author = {{solid IT GmbH}},
title = {{DB-Engines Ranking}},
journal = {DB-Engines},
year = {2022},
month = aug,
note = {[Online; accessed 1. Aug. 2022]},
url = {https://db-engines.com/en/ranking}
}