getwilds · seankross · Sep 24, 2024 · Sep 25, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: cancerprof
 Title: API Client for State Cancer Profiles
-Version: 0.1.0
+Version: 0.1.0.9001
 Authors@R: 
     person("Brian", "Park", , "[email protected]", role = c("aut", "cre"),
            comment = c(ORCID = "0009-0008-8274-3057"))
@@ -10,7 +10,7 @@ URL: https://github.com/getwilds/cancerprof, https://getwilds.org/cancerprof/
 BugReports: https://github.com/getwilds/cancerprof/issues
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 Imports: 
     cdlTools,
     cli,
@@ -20,6 +20,7 @@ Imports:
     rlang,
     stringr,
     tibble,
+    tidyr,
     utils
 Suggests: 
     knitr,

diff --git a/NAMESPACE b/NAMESPACE
@@ -38,6 +38,8 @@ importFrom(dplyr,filter)
 importFrom(dplyr,mutate)
 importFrom(dplyr,mutate_all)
 importFrom(dplyr,na_if)
+importFrom(dplyr,rename)
+importFrom(dplyr,select)
 importFrom(httr2,req_perform)
 importFrom(httr2,req_url_path_append)
 importFrom(httr2,req_url_query)
@@ -50,5 +52,6 @@ importFrom(stats,setNames)
 importFrom(stringr,str_pad)
 importFrom(stringr,str_trim)
 importFrom(tibble,as_tibble)
+importFrom(tidyr,separate_wider_regex)
 importFrom(utils,data)
 importFrom(utils,read.csv)
diff --git a/R/incidence-cancer.R b/R/incidence-cancer.R
@@ -148,6 +148,7 @@ incidence_cancer <- function(area, areatype, cancer, race, sex, age, stage, year
       areatype = tolower(areatype),
       cancer = handle_cancer(cancer),
       race = handle_race(race),
+      sex = handle_sex(sex),
       age = handle_age(age),
       stage = handle_stage(stage),
       year = handle_year(year),
@@ -157,11 +158,6 @@ incidence_cancer <- function(area, areatype, cancer, race, sex, age, stage, year
       output = 1
     )
 
-  if (!is.null(sex)) {
-    req <- req %>%
-      req_url_query(sex = handle_sex(sex))
-  }
-
   resp <- req_perform(req)
   resp_url <- resp$url
   resp <- process_resp(resp, "incidence")
@@ -178,13 +174,15 @@ incidence_cancer <- function(area, areatype, cancer, race, sex, age, stage, year
   if (stage == "all stages") {
     resp$data <- resp$data %>%
       setNames(c(
-        get_area(areatype),
+        unname(get_area(areatype)),
+        "USDA_Code",
         shared_names_to_numeric,
         "Annual_Average_Count",
         "Recent_Trend",
         "Recent_5_Year_Trend",
         "Trend_Lower_95%_CI",
-        "Trend_Upper_95%_CI"
+        "Trend_Upper_95%_CI",
+        "Citation"
       )) %>%
       mutate(across(c(
         all_of(shared_names_to_numeric),

diff --git a/R/process-resp.R b/R/process-resp.R
@@ -10,11 +10,12 @@
 #' - "mortality"
 #'
 #' @importFrom httr2 resp_body_string
-#' @importFrom dplyr mutate_all na_if filter
+#' @importFrom dplyr mutate_all na_if filter select rename filter
 #' @importFrom rlang sym
 #' @importFrom utils read.csv data
 #' @importFrom stringr str_trim
 #' @importFrom tibble as_tibble
+#' @importFrom tidyr separate_wider_regex
 #'
 #' @returns A processed response data frame
 #'
@@ -51,13 +52,11 @@ process_resp <- function(resp, topic) {
     cli_abort("Incorrect topic argument, please ensure that it is correct.")
   }
 
-  resp <- resp_lines[
+  resp_df <- resp_lines[
     (index_first_line_break + 1):(index_second_line_break - 1)
   ] %>%
     paste(collapse = "\n") %>%
-    (\(x) {
-      read.csv(textConnection(x), header = TRUE, colClasses = "character")
-    })()
+    (\(x) read.csv(textConnection(x), header = TRUE, colClasses = "character"))()
 
   column <- c(
     "Health.Service.Area",
@@ -67,16 +66,42 @@ process_resp <- function(resp, topic) {
     "Health.Service.Area",
     "County",
     "State"
-  ) %in% colnames(resp)]
+  ) %in% colnames(resp_df)]
+
+  if (column == "County" && topic == "incidence") {
+    resp_df <- resp_df %>%
+      filter(FIPS != "00000") %>%
+      separate_wider_regex(cols = "County", 
+                           patterns = c("New_County" = "[a-zA-Z\\s]+", 
+                                        "\\(", 
+                                        "Citation" = "[0-9]+", 
+                                        "\\)"),
+                           cols_remove = FALSE) %>%
+      select(-County) %>%
+      rename(County = New_County) %>%
+      select(-Citation, Citation)
+  } else if(column == "Health.Service.Area") {
+    resp_df <- resp_df %>%
+      filter(HSA_Code != "00000") %>%
+      separate_wider_regex(cols = "Health.Service.Area", 
+                           patterns = c(
+                             New_HSA = ".*?(?=\\(\\d+\\)$|$)",
+                             Citation = "\\(\\d+\\)?$"           
+                           ),
+                           cols_remove = FALSE) %>%
+      select(-Health.Service.Area) %>%
+      rename(Health.Service.Area = New_HSA) %>%
+      select(-Citation, Citation)
+  }
 
-  resp <- resp %>%
+  resp_df <- resp_df %>%
     filter(!!sym(column) != "United States")
 
   if (column %in% c("Health.Service.Area", "County")) {
-    resp <- resp %>%
+    resp_df <- resp_df %>%
       filter(!(!!sym(column) %in% state_name))
   }
-  resp <- resp %>%
+  resp_df <- resp_df %>%
     mutate_all(stringr::str_trim) %>%
     mutate_all(\(x) na_if(x, "N/A")) %>%
     mutate_all(\(x) na_if(x, "data not available")) %>%
@@ -86,5 +111,5 @@ process_resp <- function(resp, topic) {
   resp_metadata <- c(
     resp_lines[1: (index_first_line_break - 1)], resp_lines[(index_second_line_break + 1): line_length]
   )
-  list(metadata = resp_metadata, data = resp)
+  list(metadata = resp_metadata, data = resp_df)
 }
diff --git a/man/demo_language.Rd b/man/demo_language.Rd