paper.bib

@InProceedings{10.1007/978-3-030-84924-5_6,
author="Erdelt, Patrick K.",
editor="Nambiar, Raghunath
and Poess, Meikel",
title="A Framework for Supporting Repetition and Evaluation in the Process of Cloud-Based DBMS Performance Benchmarking",
booktitle="Performance Evaluation and Benchmarking",
year="2021",
publisher="Springer International Publishing",
address="Cham",
pages="75--92",
abstract="Performance benchmarking of Database Management Systems (DBMS) is an important yet complicated process. We motivate and present two supporting Python packages which help to avoid common pitfalls and in particular improve reproducibility and transparency in heterogeneous systems with hardware accelerators. The first addresses operational aspects by providing dynamic testbeds using Docker images, especially for cloud-based systems. The second helps planning and recurrently running experiments in a predefined setup via JDBC/SQL, and analyzing results with automated reports and an interactive dashboard. The purpose of this is to thoroughly evaluate aspects of performances of DBMS based on real-life measurements, runtime and hardware metrics, depending on various parameters including the hardware, and with high repeatability. We present a series of TPC-H inspired example benchmarks in a Kubernetes cluster for demonstration, and some lessons learned.",
isbn="978-3-030-84924-5",
url = {https://link.springer.com/chapter/10.1007/978-3-030-84924-5_6},
doi = {10.1007/978-3-030-84924-5_6}
}


@InProceedings{10.1007/978-3-030-94437-7_6,
author="Erdelt, Patrick K.",
editor="Nambiar, Raghunath
and Poess, Meikel",
title="Orchestrating DBMS Benchmarking in the Cloud with Kubernetes",
booktitle="Performance Evaluation and Benchmarking",
year="2022",
publisher="Springer International Publishing",
address="Cham",
pages="81--97",
abstract="Containerization has become a common practise in software provisioning. Kubernetes (K8s) is useful in deploying containers in clusters, in managing their lifecycle, in scheduling and resource allocation. The benchmarking process requires the interaction of various components. We propose a way to organize benchmarking in the Cloud by looking at typical components in the process and ask if they could be managed by K8s as containerized Microservices. We aim at scalability for the process, parallelized execution and minimized traffic I/O from and into the Cloud. This supports planning a series of experiments to investigate a high-dimensional parameter space and avoiding complex installations. This also provides a way for Cross-Cloud comparison.",
isbn="978-3-030-94437-7",
url = {https://link.springer.com/chapter/10.1007/978-3-030-94437-7_6},
doi = {10.1007/978-3-030-94437-7_6}
}

@InProceedings{10.1007/978-3-319-67162-8_12,
author="Seybold, Daniel
and Domaschka, J{\"o}rg",
title="Is Distributed Database Evaluation Cloud-Ready?",
booktitle="New Trends in Databases and Information Systems",
year="2017",
publisher="Springer International Publishing",
address="Cham",
pages="100--108",
abstract="The database landscape has significantly evolved over the last decade as cloud computing enables to run distributed databases on virtually unlimited cloud resources. Hence, the already non-trivial task of selecting and deploying a distributed database system becomes more challenging. Database evaluation frameworks aim at easing this task by guiding the database selection and deployment decision. The evaluation of databases has evolved as well by moving the evaluation focus from performance to distribution aspects such as scalability and elasticity. This paper presents a cloud-centric analysis of distributed database evaluation frameworks based on evaluation tiers and framework requirements. It analysis eight well adopted evaluation frameworks. The results point out that the evaluation tiers performance, scalability, elasticity and consistency are well supported, in contrast to resource selection and availability. Further, the analysed frameworks do not support cloud-centric requirements but support classic evaluation requirements.",
isbn="978-3-319-67162-8",
url = {https://link.springer.com/chapter/10.1007/978-3-319-67162-8_12},
doi = {10.1007/978-3-319-67162-8_12}
}

@InProceedings{10.1007/978-3-030-12079-5_4,
author="Brent, Lexi
and Fekete, Alan",
editor="Chang, Lijun
and Gan, Junhao
and Cao, Xin",
title="A Versatile Framework for Painless Benchmarking of Database Management Systems",
booktitle="Databases Theory and Applications",
year="2019",
publisher="Springer International Publishing",
address="Cham",
pages="45--56",
abstract="Benchmarking is a crucial aspect of evaluating database management systems. Researchers, developers, and users utilise industry-standard benchmarks to assist with their research, development, or purchase decisions, respectively. Despite this ubiquity, benchmarking is usually a difficult process involving laborious tasks such as writing and debugging custom testbed scripts, or extracting and transforming output into useful formats. To date, there are only a limited number of comprehensive benchmarking frameworks designed to tackle these usability and efficiency challenges directly.",
isbn="978-3-030-12079-5",
url={https://link.springer.com/chapter/10.1007/978-3-030-12079-5_4},
doi = {10.1007/978-3-030-12079-5_4}
}

@inproceedings{Raasveldt2018FBC32099503209955,
 author = {Raasveldt, Mark and Holanda, Pedro and Gubner, Tim and M\"{u}hleisen, Hannes},
 title = {Fair Benchmarking Considered Difficult: Common Pitfalls In Database Performance Testing},
 booktitle = {Proceedings of the Workshop on Testing Database Systems},
 series = {DBTest'18},
 year = {2018},
 isbn = {978-1-4503-5826-2},
 location = {Houston, TX, USA},
 pages = {2:1--2:6},
 articleno = {2},
 numpages = {6},
 url = {http://doi.acm.org/10.1145/3209950.3209955},
 acmid = {3209955},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {Benchmarking, Performance Evaluation},
 doi = {10.1145/3209950.3209955}
}

@inproceedings{DBLPconfsigmodKerstenKZ18,
  author    = {Martin L. Kersten and
               Panagiotis Koutsourakis and
               Ying Zhang},
  editor    = {Alexander B{\"{o}}hm and
               Tilmann Rabl},
  title     = {Finding the Pitfalls in Query Performance},
  booktitle = {Proceedings of the 7th {I}nternational {W}orkshop on {T}esting {D}atabase
               {S}ystems, DBTest@SIGMOD 2018,},
  pages     = {3:1--3:6},
  publisher = {{ACM}},
  year      = {2018},
  url       = {https://doi.org/10.1145/3209950.3209951},
  timestamp = {Mon, 12 Aug 2019 13:49:51 +0200},
  biburl    = {https://dblp.org/rec/conf/sigmod/KerstenKZ18.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  doi       = {10.1145/3209950.3209951},
}

@software{reback2020pandas,
    author       = {{The pandas development team}},
    title        = {pandas-dev/pandas: Pandas},
    month        = feb,
    year         = 2020,
    publisher    = {Zenodo},
    version      = {latest},
    doi          = {10.5281/zenodo.3509134},
    url          = {https://doi.org/10.5281/zenodo.3509134}
}

@InProceedings{mckinney-proc-scipy-2010,
  author    = { Wes {M}c{K}inney },
  title     = { {D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython },
  booktitle = { {P}roceedings of the 9th {P}ython in {S}cience {C}onference },
  pages     = { 56 - 61 },
  year      = { 2010 },
  editor    = { {S}t\'efan van der {W}alt and {J}arrod {M}illman },
  doi       = { 10.25080/Majora-92bf1922-00a }
}

@conference {208870,
author = {Bjorn Rabenstein and Julius Volz},
title = {Prometheus: A Next-Generation Monitoring System (Talk)},
year = {2015},
address = {Dublin},
publisher = {{USENIX} Association},
month = may,
}

@conference{Kluyver2016jupyter,
Title = {Jupyter Notebooks -- a publishing format for reproducible computational workflows},
Author = {Thomas Kluyver and Benjamin Ragan-Kelley and Fernando P{\'e}rez and Brian Granger and Matthias Bussonnier and Jonathan Frederic and Kyle Kelley and Jessica Hamrick and Jason Grout and Sylvain Corlay and Paul Ivanov and Dami{\'a}n Avila and Safia Abdalla and Carol Willing},
Booktitle = {Positioning and Power in Academic Publishing: Players, Agents and Agendas},
Editor = {F. Loizides and B. Schmidt},
Organization = {IOS Press},
Pages = {87 - 90},
Year = {2016},
doi = {10.3233/978-1-61499-649-1-87}
}

@Article{Hunter:2007,
  Author    = {Hunter, J. D.},
  Title     = {Matplotlib: A 2D graphics environment},
  Journal   = {Computing in Science \& Engineering},
  Volume    = {9},
  Number    = {3},
  Pages     = {90--95},
  abstract  = {Matplotlib is a 2D graphics package used for Python for
  application development, interactive scripting, and publication-quality
  image generation across user interfaces and operating systems.},
  publisher = {IEEE COMPUTER SOC},
  doi       = {10.1109/MCSE.2007.55},
  year      = 2007
}

@ARTICLE{2020SciPy-NMeth,
  author  = {Virtanen, Pauli and Gommers, Ralf and Oliphant, Travis E. and
            Haberland, Matt and Reddy, Tyler and Cournapeau, David and
            Burovski, Evgeni and Peterson, Pearu and Weckesser, Warren and
            Bright, Jonathan and {van der Walt}, St{\'e}fan J. and
            Brett, Matthew and Wilson, Joshua and Millman, K. Jarrod and
            Mayorov, Nikolay and Nelson, Andrew R. J. and Jones, Eric and
            Kern, Robert and Larson, Eric and Carey, C J and
            Polat, {\.I}lhan and Feng, Yu and Moore, Eric W. and
            {VanderPlas}, Jake and Laxalde, Denis and Perktold, Josef and
            Cimrman, Robert and Henriksen, Ian and Quintero, E. A. and
            Harris, Charles R. and Archibald, Anne M. and
            Ribeiro, Ant{\^o}nio H. and Pedregosa, Fabian and
            {van Mulbregt}, Paul and {SciPy 1.0 Contributors}},
  title   = {{{SciPy} 1.0: Fundamental Algorithms for Scientific
            Computing in Python}},
  journal = {Nature Methods},
  year    = {2020},
  volume  = {17},
  pages   = {261--272},
  adsurl  = {https://rdcu.be/b08Wh},
  doi     = {10.1038/s41592-019-0686-2},
}


@book{KounevLK20,
  author    = {Samuel Kounev and
               Klaus{-}Dieter Lange and
               J{\'{o}}akim von Kistowski},
  title     = {Systems Benchmarking - For Scientists and Engineers},
  publisher = {Springer},
  year      = {2020},
  url       = {https://doi.org/10.1007/978-3-030-41705-5},
  doi       = {10.1007/978-3-030-41705-5},
  isbn      = {978-3-030-41704-8},
  timestamp = {Tue, 08 Sep 2020 01:00:00 +0200},
  biburl    = {https://dblp.org/rec/books/sp/KounevLK20.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@book{series/utcs/IgualS17,
  added-at = {2018-11-02T00:00:00.000+0100},
  author = {Igual, Laura and Seguí, Santi},
  biburl = {https://www.bibsonomy.org/bibtex/2df957256a471cf3626c5a280424a2b9a/dblp},
  ee = {https://doi.org/10.1007/978-3-319-50017-1},
  doi = {10.1007/978-3-319-50017-1},
  interhash = {1f6d348e064db5ee3a118549b35cbb04},
  intrahash = {df957256a471cf3626c5a280424a2b9a},
  isbn = {978-3-319-50017-1},
  keywords = {dblp},
  pages = {1-215},
  publisher = {Springer},
  series = {Undergraduate Topics in Computer Science},
  timestamp = {2018-11-03T12:45:44.000+0100},
  title = {Introduction to Data Science - A {P}ython Approach to Concepts, Techniques and Applications},
  year = 2017
}

@article{Waskom2021,
    doi = {10.21105/joss.03021},
    url = {https://doi.org/10.21105/joss.03021},
    year = {2021},
    publisher = {The Open Journal},
    volume = {6},
    number = {60},
    pages = {3021},
    author = {Michael L. Waskom},
    title = {seaborn: statistical data visualization},
    journal = {Journal of Open Source Software}
 }

 @misc{TIOBE,
  author = {TIOBE},
  title = {{TIOBE Index - TIOBE}},
  journal = {TIOBE},
  year = {2022},
  month = jun,
  note = {[Online; accessed 31. Jul. 2022]},
  url = {https://www.tiobe.com/tiobe-index}
}

@misc{PYPL,
  author = {PYPL},
  title = {{PYPL PopularitY of Programming Language index}},
  year = {2022},
  month = jul,
  note = {[Online; accessed 31. Jul. 2022]},
  url = {https://pypl.github.io/PYPL.html}
}

@inproceedings{10114533389063338912,
author = {He, Sen and Manns, Glenna and Saunders, John and Wang, Wei and Pollock, Lori and Soffa, Mary Lou},
title = {A Statistics-Based Performance Testing Methodology for Cloud Applications},
year = {2019},
isbn = {9781450355728},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3338906.3338912},
booktitle = {Proceedings of the 2019 27th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering},
pages = {188--199},
numpages = {12},
keywords = {cloud computing, resource contention, performance testing, non- parametric statistics},
location = {Tallinn, Estonia},
series = {ESEC/FSE 2019},
doi = {10.1145/3338906.3338912},
}


@misc{DBDBIO,
  author = {{Carnegie Mellon Database Group}},
  title = {{Database of Databases}},
  journal = {Database of Databases},
  year = {2022},
  month = aug,
  note = {[Online; accessed 1. Aug. 2022]},
  url = {https://dbdb.io}
}

@misc{DBEngines,
  author = {{solid IT GmbH}},
  title = {{DB-Engines Ranking}},
  journal = {DB-Engines},
  year = {2022},
  month = aug,
  note = {[Online; accessed 1. Aug. 2022]},
  url = {https://db-engines.com/en/ranking}
}