This package curate (downloads, clean, consolidate, smooth) data from Johns Hopkins and Our world in data for analysing international outbreak of COVID-19.
It includes several visualizations of the COVID-19 international outbreak.
- COVID19DataProcessor generates curated series
- visualizations by Yanchang Zhao are included in ReportGenerator R6 object
- More visualizations included int ReportGeneratorEnhanced R6 object
- Visualizations ReportGeneratorDataComparison compares all countries counting epidemy day 0 when confirmed cases > n (i.e. n = 100).
Release | Usage | Development |
---|---|---|
Install the R package using the following commands on the R console:
# install.packages("devtools")
devtools::install_github("rOpenStats/COVID19analytics", build_opts = NULL)
First configurate environment variables with your preferred
configurations in ~/.Renviron
. COVID19analytics_data_dir is mandatory
while COVID19analytics_credits can be configured if you want to publish
your own research with space separated alias. Mention previous authors
where corresponding
COVID19analytics_data_dir = "~/.R/COVID19analytics"
# If you want to generate your own reports
COVID19analytics_credits = "@alias1 @alias2 @aliasn"
library(COVID19analytics)
#> Warning: replacing previous import 'ggplot2::Layout' by 'lgr::Layout' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'readr::col_factor' by 'scales::col_factor'
#> when loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::not' by 'testthat::not' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'dplyr::matches' by 'testthat::matches' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'readr::edition_get' by
#> 'testthat::edition_get' when loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::equals' by 'testthat::equals' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::is_less_than' by
#> 'testthat::is_less_than' when loading 'COVID19analytics'
#> Warning: replacing previous import 'readr::local_edition' by
#> 'testthat::local_edition' when loading 'COVID19analytics'
#> Warning: replacing previous import 'testthat::matches' by 'tidyr::matches' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::extract' by 'tidyr::extract' when
#> loading 'COVID19analytics'
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(knitr)
library(lgr)
log.dir <- file.path(getEnv("data_dir"), "logs")
dir.create(log.dir, recursive = TRUE, showWarnings = FALSE)
log.file <- file.path(log.dir, "covid19analytics.log")
lgr::get_logger("root")$add_appender(AppenderFile$new(log.file))
lgr::threshold("info", lgr::get_logger("root"))
lgr::threshold("info", lgr::get_logger("COVID19ARCurator"))
data.processor <- COVID19DataProcessor$new(provider = "JohnsHopkingsUniversity", missing.values = "imputation")
#dummy <- data.processor$preprocess() is setupData + transform is the preprocess made by data provider
dummy <- data.processor$setupData()
#> INFO [09:12:34.000] {stage: `processor-setup`}
#> INFO [09:12:34.116] Checking required downloaded {downloaded.max.date: `2021-09-26`, daily.update.time: `21:00:00`, current.datetime: `2021-09-28 09:12:34`, download.flag: `TRUE`}
#> INFO [09:12:35.748] Checking required downloaded {downloaded.max.date: `2021-09-26`, daily.update.time: `21:00:00`, current.datetime: `2021-09-28 09:12:35`, download.flag: `TRUE`}
#> INFO [09:12:36.912] Checking required downloaded {downloaded.max.date: `2021-09-26`, daily.update.time: `21:00:00`, current.datetime: `2021-09-28 09:12:36`, download.flag: `TRUE`}
#> INFO [09:12:37.996] {stage: `data loaded`}
#> INFO [09:12:37.998] {stage: `data-setup`}
dummy <- data.processor$transform()
#> INFO [09:12:38.001] Executing transform
#> INFO [09:12:38.003] Executing consolidate
#> INFO [09:12:59.462] {stage: `consolidated`}
#> INFO [09:12:59.466] Executing standarize
#> INFO [09:13:02.569] gathering DataModel
#> INFO [09:13:02.571] {stage: `datamodel-setup`}
# Curate is the process made by missing values method
dummy <- data.processor$curate()
#> INFO [09:13:02.580] {stage: `loading-aggregated-data-model`}
#> Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: Micronesia
#> Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: MS Zaandam
#> Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: Summer Olympics 2020
#> INFO [09:13:10.117] {stage: `calculating-rates`}
#> INFO [09:13:10.346] {stage: `making-data-comparison`}
#> INFO [09:13:19.438] {stage: `applying-missing-values-method`}
#> INFO [09:13:19.440] {stage: `Starting first imputation`}
#> INFO [09:13:19.451] {stage: `calculating-rates`}
#> INFO [09:13:19.776] {stage: `making-data-comparison-2`}
#> INFO [09:13:29.486] {stage: `calculating-top-countries`}
#> INFO [09:13:29.509] {stage: `curated`}
current.date <- max(data.processor$getData()$date)
rg <- ReportGeneratorEnhanced$new(data.processor)
rc <- ReportGeneratorDataComparison$new(data.processor = data.processor)
top.countries <- data.processor$top.countries
international.countries <- unique(c(data.processor$top.countries,
"China", "Japan", "Singapore", "Korea, South"))
latam.countries <- sort(c("Mexico",
data.processor$countries$getCountries(division = "sub.continent", name = "Caribbean"),
data.processor$countries$getCountries(division = "sub.continent", name = "Central America"),
data.processor$countries$getCountries(division = "sub.continent", name = "South America")))
# Top 10 daily cases confirmed increment
kable((data.processor$getData() %>%
filter(date == current.date) %>%
select(country, date, rate.inc.daily, confirmed.inc, confirmed, deaths, deaths.inc) %>%
arrange(desc(confirmed.inc)) %>%
filter(confirmed >=10))[1:10,])
country | date | rate.inc.daily | confirmed.inc | confirmed | deaths | deaths.inc |
---|---|---|---|---|---|---|
US | 2021-09-27 | 0.0043 | 185088 | 43116442 | 690426 | 2394 |
United Kingdom | 2021-09-27 | 0.0049 | 37583 | 7737941 | 136569 | 40 |
Turkey | 2021-09-27 | 0.0039 | 27188 | 7066658 | 63372 | 206 |
Russia | 2021-09-27 | 0.0030 | 21731 | 7334843 | 201015 | 770 |
India | 2021-09-27 | 0.0006 | 18795 | 33697581 | 447373 | 179 |
Iran | 2021-09-27 | 0.0026 | 14470 | 5547990 | 119649 | 289 |
Brazil | 2021-09-27 | 0.0007 | 14423 | 21366395 | 594653 | 210 |
Malaysia | 2021-09-27 | 0.0050 | 10959 | 2209194 | 25695 | 258 |
Thailand | 2021-09-27 | 0.0066 | 10288 | 1571926 | 16369 | 101 |
Vietnam | 2021-09-27 | 0.0124 | 9362 | 766051 | 18758 | 174 |
# Top 10 daily deaths increment
kable((data.processor$getData() %>%
filter(date == current.date) %>%
select(country, date, rate.inc.daily, confirmed.inc, confirmed, deaths, deaths.inc) %>%
arrange(desc(deaths.inc)))[1:10,])
country | date | rate.inc.daily | confirmed.inc | confirmed | deaths | deaths.inc |
---|---|---|---|---|---|---|
US | 2021-09-27 | 0.0043 | 185088 | 43116442 | 690426 | 2394 |
Russia | 2021-09-27 | 0.0030 | 21731 | 7334843 | 201015 | 770 |
France | 2021-09-27 | 0.0002 | 1503 | 7087110 | 117581 | 399 |
Kazakhstan | 2021-09-27 | 0.0018 | 1730 | 956025 | 15865 | 362 |
Iran | 2021-09-27 | 0.0026 | 14470 | 5547990 | 119649 | 289 |
Malaysia | 2021-09-27 | 0.0050 | 10959 | 2209194 | 25695 | 258 |
Mexico | 2021-09-27 | 0.0008 | 3007 | 3635807 | 275676 | 226 |
Brazil | 2021-09-27 | 0.0007 | 14423 | 21366395 | 594653 | 210 |
Turkey | 2021-09-27 | 0.0039 | 27188 | 7066658 | 63372 | 206 |
India | 2021-09-27 | 0.0006 | 18795 | 33697581 | 447373 | 179 |
rg$ggplotTopCountriesStackedBarDailyInc(included.countries = latam.countries, countries.text = "Latam countries")
#> Warning: Removed 144 rows containing missing values (position_stack).
rc$ggplotComparisonExponentialGrowth(included.countries = latam.countries, countries.text = "Latam countries",
field = "confirmed", y.label = "Confirmed", min.cases = 100)
#> Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
#> increasing max.overlaps
rc$ggplotComparisonExponentialGrowth(included.countries = latam.countries, countries.text = "Latam countries",
field = "remaining.confirmed", y.label = "Active cases", min.cases = 100)
#> Warning in self$trans$transform(x): NaNs produced
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
#> increasing max.overlaps
rc$ggplotComparisonExponentialGrowth(included.countries = latam.countries, field = "deaths", y.label = "Deaths", min.cases = 1)
rg$ggplotCrossSection(included.countries = latam.countries,
field.x = "confirmed",
field.y = "fatality.rate.max",
plot.description = "Cross section Confirmed vs Death rate min",
log.scale.x = TRUE,
log.scale.y = FALSE)
#> Warning: Removed 144 row(s) containing missing values (geom_path).
rg$ggplotCountriesLines(included.countries = latam.countries, countries.text = "Latam countries",
field = "confirmed.inc", log.scale = TRUE)
#> Warning: Removed 144 row(s) containing missing values (geom_path).
rg$ggplotCountriesLines(included.countries = latam.countries, countries.text = "Latam countries",
field = "deaths.inc", log.scale = TRUE)
#> Warning in self$trans$transform(x): NaNs produced
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Warning in self$trans$transform(x): NaNs produced
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Warning: Removed 5 rows containing missing values (geom_point).
#> Warning: Removed 144 row(s) containing missing values (geom_path).
#> Warning: ggrepel: 7 unlabeled data points (too many overlaps). Consider
#> increasing max.overlaps
rg$ggplotCountriesLines(included.countries = latam.countries, countries.text = "Latam countries",
field = "rate.inc.daily", log.scale = TRUE)
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Warning: Removed 144 row(s) containing missing values (geom_path).
rg$ggplotTopCountriesStackedBarDailyInc(top.countries)
#> Warning: Removed 67 rows containing missing values (position_stack).
rc$ggplotComparisonExponentialGrowth(included.countries = international.countries,
field = "confirmed", y.label = "Confirmed", min.cases = 100)
#> Warning: Removed 2 row(s) containing missing values (geom_path).
#> Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
#> increasing max.overlaps
rc$ggplotComparisonExponentialGrowth(included.countries = international.countries,
field = "remaining.confirmed", y.label = "Active cases", min.cases = 100)
#> Warning: Removed 2 row(s) containing missing values (geom_path).
#> Warning: ggrepel: 3 unlabeled data points (too many overlaps). Consider
#> increasing max.overlaps
rc$ggplotComparisonExponentialGrowth(included.countries = international.countries, field = "deaths",
y.label = "Deaths", min.cases = 1)
#> Warning: Removed 2 row(s) containing missing values (geom_path).
#> Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
#> increasing max.overlaps
rg$ggplotCrossSection(included.countries = international.countries,
field.x = "confirmed",
field.y = "fatality.rate.max",
plot.description = "Cross section Confirmed vs Death rate min",
log.scale.x = TRUE,
log.scale.y = FALSE)
#> Warning: Removed 90 row(s) containing missing values (geom_path).
rg$ggplotCountriesLines(field = "confirmed.inc", log.scale = TRUE)
#> Warning: Removed 66 row(s) containing missing values (geom_path).
rg$ggplotCountriesLines(field = "deaths.inc", log.scale = TRUE)
#> Warning in self$trans$transform(x): NaNs produced
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Warning: Removed 7 rows containing missing values (geom_point).
#> Warning: Removed 66 row(s) containing missing values (geom_path).
rg$ggplotCountriesLines(field = "rate.inc.daily", log.scale = TRUE)
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Warning: Removed 66 row(s) containing missing values (geom_path).
rg$ggplotTopCountriesPie()
rg$ggplotTopCountriesBarPlots()
rg$ggplotCountriesBarGraphs(selected.country = "Argentina")
-
Johns Hopkins University. Retrieved from: ‘https://github.com/CSSEGISandData/COVID-19/’ [Online Resource]
-
OurWorldInData.org. Retrieved from: ‘https://ourworldindata.org/coronavirus’ [Online Resource]
Yanchang Zhao, COVID-19 Data Analysis with Tidyverse and Ggplot2 - China. RDataMining.com, 2020.
URL: http://www.rdatamining.com/docs/Coronavirus-data-analysis-china.pdf.