diff --git a/DESCRIPTION b/DESCRIPTION index 2dd0299..c1dac0f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: openVA Type: Package Title: Automated Method for Verbal Autopsy -Version: 1.1.1 -Date: 2023-03-17 +Version: 1.1.2 +Date: 2024-01-28 Author: Zehang Richard Li, Jason Thomas, Tyler H. McCormick, Samuel J. Clark Maintainer: Zehang Richard Li Depends: R (>= 3.1) @@ -17,7 +17,7 @@ Description: Implements multiple existing open-source algorithms for coding caus License: GPL-2 URL: https://github.com/verbal-autopsy-software/openVA BugReports: https://github.com/verbal-autopsy-software/openVA/issues -RoxygenNote: 7.2.1 +RoxygenNote: 7.2.3 VignetteBuilder: R.rsp, knitr NeedsCompilation: no Config/build/clean-inst-doc: FALSE diff --git a/NEWS.md b/NEWS.md index 8994b46..88412f6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,8 @@ # openVA - changes +Version 1.1.2 (2024-01-28) +========================== ++ Update information on downloading PHMRC data with user registration on their website. + Version 1.1.1 (2023-03-17) ========================== + Satisfy new CRAN requirement changes. @@ -17,7 +21,7 @@ Version 1.0.14 (2022-03-11) + Minor code improvements checking the input data format. + Improved `getCSMF_accuracy()` function with stricter input + New function to compute overall chance-corrected concordance `getCCC()` -+ Updated `getTopCOD` function with parameter to return top n causes and correspoonding probabilities (or values for how likely the cause is for InterVA) ++ Updated `getTopCOD` function with parameter to return top n causes and corresponding probabilities (or values for how likely the cause is for InterVA) Version 1.0.13 (2021-09-16) ========================== diff --git a/R/ConvertData.r b/R/ConvertData.r index e1b9fb6..3f41f21 100644 --- a/R/ConvertData.r +++ b/R/ConvertData.r @@ -79,21 +79,28 @@ ConvertData <- function(input, yesLabel = NULL, noLabel = NULL, missLabel = NULL #' @export getPHMRC_url #' #' @examples -#' link <- getPHMRC_url("adult") -#' summary(link)$description +#' getPHMRC_url("adult") +#' #' getPHMRC_url <- function(type){ + message( + "****************************************\n +Starting from January 2024, the PHMRC dataset requires user registration to download. As the result of the change, directly reading the CSV file within R using the link does not work any more.\n +Please go to the following link to register or log in to your account, and click the Files tab to download the corresponding CSV files. Then you can use 'read.csv' function to load the dataset into R\n +Download Link:\n https://ghdx.healthdata.org/record/ihme-data/population-health-metrics-research-consortium-gold-standard-verbal-autopsy-data-2005-2011 +\n****************************************\n + ") + return(NULL) if(type == "adult"){ - return(url('http://ghdx.healthdata.org/sites/default/files/record-attached-files/IHME_PHMRC_VA_DATA_ADULT_Y2013M09D11_0.csv')) + return(('http://ghdx.healthdata.org/sites/default/files/record-attached-files/IHME_PHMRC_VA_DATA_ADULT_Y2013M09D11_0.csv')) }else if(type == "child"){ - return(url('http://ghdx.healthdata.org/sites/default/files/record-attached-files/IHME_PHMRC_VA_DATA_CHILD_Y2013M09D11_0.csv')) + return(('http://ghdx.healthdata.org/sites/default/files/record-attached-files/IHME_PHMRC_VA_DATA_CHILD_Y2013M09D11_0.csv')) }else if(type == "neonate"){ - return(url('http://ghdx.healthdata.org/sites/default/files/record-attached-files/IHME_PHMRC_VA_DATA_NEONATE_Y2013M09D11_0.csv')) + return(('http://ghdx.healthdata.org/sites/default/files/record-attached-files/IHME_PHMRC_VA_DATA_NEONATE_Y2013M09D11_0.csv')) }else{ stop("Unknown type") } - } #' Convert standard PHMRC data into binary indicator format #' @@ -111,13 +118,14 @@ getPHMRC_url <- function(type){ #' @export ConvertData.phmrc #' @references James, S. L., Flaxman, A. D., Murray, C. J., & Population Health Metrics Research Consortium. (2011). \emph{Performance of the Tariff Method: validation of a simple additive algorithm for analysis of verbal autopsies. } \emph{Population Health Metrics, 9(1), 1-16.} #' @examples -#' \donttest{ -#' # read the raw data files from PHMRC website -#' # notice reading directly from internet could be time consuming -#' # so we only read 100 rows here. -#' # in practice, it is much easier and faster to download the file first, -#' # and read all at once. -#' raw <- read.csv(getPHMRC_url("adult"), nrows = 100) +#' \dontrun{ +#' # Starting from Jan 2024, PHMRC data requires registration at the GHDx website +#' # to doload. The following commands assume the user has download the file for +#' # PHMRC VA adult data from the website after logging in. +#' +#' # For more details on the download process, see ?getPHMRC_url. +#' +#' raw <- read.csv("IHME_PHMRC_VA_DATA_ADULT_Y2013M09D11_0.csv", nrows = 100) #' head(raw[, 1:20]) #' # default way of conversion #' clean <- ConvertData.phmrc(raw, phmrc.type = "adult") @@ -129,7 +137,7 @@ getPHMRC_url <- function(type){ #' #' # Now using the first 100 rows of data as training dataset #' # And the next 100 as testing dataset -#' test <- read.csv(getPHMRC_url("adult"), nrows = 200) +#' test <- read.csv("IHME_PHMRC_VA_DATA_ADULT_Y2013M09D11_0.csv", nrows = 200) #' test <- test[-(1:100), ] #' #' # For the default transformation it does matter diff --git a/man/ConvertData.phmrc.Rd b/man/ConvertData.phmrc.Rd index e559d46..3460205 100644 --- a/man/ConvertData.phmrc.Rd +++ b/man/ConvertData.phmrc.Rd @@ -33,13 +33,14 @@ converted dataset with only ID and binary symptoms. Notice that when applying th The PHMRC data and the description of the format could be found at \url{https://ghdx.healthdata.org/record/ihme-data/population-health-metrics-research-consortium-gold-standard-verbal-autopsy-data-2005-2011}. This function convert the symptoms into binary indicators of three levels: Yes, No, and Missing. The health care experience (HCE) and free-text columns, i.e., columns named "word_****", are not considered in the current version of data conversion. } \examples{ -\donttest{ -# read the raw data files from PHMRC website -# notice reading directly from internet could be time consuming -# so we only read 100 rows here. -# in practice, it is much easier and faster to download the file first, -# and read all at once. -raw <- read.csv(getPHMRC_url("adult"), nrows = 100) +\dontrun{ +# Starting from Jan 2024, PHMRC data requires registration at the GHDx website +# to doload. The following commands assume the user has download the file for +# PHMRC VA adult data from the website after logging in. + +# For more details on the download process, see ?getPHMRC_url. + +raw <- read.csv("IHME_PHMRC_VA_DATA_ADULT_Y2013M09D11_0.csv", nrows = 100) head(raw[, 1:20]) # default way of conversion clean <- ConvertData.phmrc(raw, phmrc.type = "adult") @@ -51,7 +52,7 @@ head(clean2$output[, 1:20]) # Now using the first 100 rows of data as training dataset # And the next 100 as testing dataset -test <- read.csv(getPHMRC_url("adult"), nrows = 200) +test <- read.csv("IHME_PHMRC_VA_DATA_ADULT_Y2013M09D11_0.csv", nrows = 200) test <- test[-(1:100), ] # For the default transformation it does matter diff --git a/man/getPHMRC_url.Rd b/man/getPHMRC_url.Rd index ab3418e..0acd07b 100644 --- a/man/getPHMRC_url.Rd +++ b/man/getPHMRC_url.Rd @@ -16,7 +16,7 @@ URL of the corresponding dataset Get the URL to the PHMRC dataset } \examples{ -link <- getPHMRC_url("adult") -summary(link)$description +getPHMRC_url("adult") + } diff --git a/tests/testthat/tests.convertdata.r b/tests/testthat/tests.convertdata.r index bf4fb9e..fdcab7c 100644 --- a/tests/testthat/tests.convertdata.r +++ b/tests/testthat/tests.convertdata.r @@ -35,55 +35,3 @@ test_that("ConvertData - ConvertData 2016", { expect_equal(new_data_2016[2, 2], "n") expect_equal(new_data_2016[3, 2], "-") }) - -# getPHMRC_url() - -phmrc_url_adult = getPHMRC_url("adult") -phmrc_url_child = getPHMRC_url("child") -phmrc_url_neonate = getPHMRC_url("neonate") -phmrc_adult <- read.csv(phmrc_url_adult) -phmrc_child <- read.csv(phmrc_url_child) -phmrc_neonate <- read.csv(phmrc_url_neonate) - -test_that("ConvertData - getPHMRC_url", { - expect_s3_class(phmrc_url_adult, "url") - expect_s3_class(phmrc_url_child, "url") - expect_s3_class(phmrc_url_neonate, "url") - expect_error(getPHMRC_url("males")) - expect_equal("adult", tolower(phmrc_adult$module[1])) - expect_equal("child", tolower(phmrc_child$module[1])) - expect_equal("neonate", tolower(phmrc_neonate$module[1])) -}) - -# ConvertData.phmrc() - -converted_adult <- ConvertData.phmrc(phmrc_adult, phmrc.type = "adult") -converted_child <- ConvertData.phmrc(phmrc_child, phmrc.type = "child") -#converted_neonate <- ConvertData.phmrc(phmrc_neonate, phmrc.type = "neonate") - -test_that("ConvertData - ConvertData.phmrc", { - expect_s3_class(converted_adult$output, "data.frame") - expect_s3_class(converted_child$output, "data.frame") - #expect_s3_class(converted_neonate$output, "data.frame") - - # Adult - expect_equal(converted_adult$output$a2_02[phmrc_adult$a2_02 == "Yes"], - rep("Y", sum(phmrc_adult$a2_02 == "Yes"))) - expect_equal(converted_adult$output$a2_02[phmrc_adult$a2_02 == "No"], - rep("", sum(phmrc_adult$a2_02 == "No"))) - expect_equal( - converted_adult$output$a2_02[phmrc_adult$a2_02 == "Don't Know" | - phmrc_adult$a2_02 == "Refused to Answer"], - rep(".", sum(phmrc_adult$a2_02 == "Don't Know" | - phmrc_adult$a2_02 == "Refused to Answer"))) - # Child - expect_equal(converted_child$output$c5_10[phmrc_child$c5_10 == "Yes"], - rep("Y", sum(phmrc_child$c5_10 == "Yes"))) - expect_equal(converted_child$output$c5_10[phmrc_child$c5_10 == "No"], - rep("", sum(phmrc_child$c5_10 == "No"))) - expect_equal( - converted_child$output$c5_10[phmrc_child$c5_10 == "Don't Know" | - phmrc_child$c5_10 == "Refused to Answer"], - rep(".", sum(phmrc_child$c5_10 == "Don't Know" | - phmrc_child$c5_10 == "Refused to Answer"))) -}) diff --git a/tests/testthat/testthat-problems.rds b/tests/testthat/testthat-problems.rds new file mode 100644 index 0000000..d993fcb Binary files /dev/null and b/tests/testthat/testthat-problems.rds differ