bird stuff

DOI-USGS · Jan 4, 2017 · ba6288d · ba6288d
1 parent f4697fa
commit ba6288d
Show file tree

Hide file tree

Showing 10 changed files with 54 additions and 103 deletions.
diff --git a/biologicalBP.R b/biologicalBP.R
@@ -12,7 +12,7 @@ graphData <- chemicalSummary %>%
   left_join(select(endPointInfo, endPoint=assay_component_name, intended_target_family),
             by=c("endPoint")) %>%
   select(-category) %>%
-  rename(category = intended_target_family) %>%
+  rename(category = choices) %>%
   group_by(site,date,category) %>%
   summarise(sumEAR=sum(EAR)) %>%
   data.frame() %>%
@@ -64,7 +64,7 @@ bioPlot <- bioPlot +
 
 bioPlot
 
-ggsave(bioPlot, bg = "transparent",
+ggsave(bioPlot, #bg = "transparent",
        filename = "bioPlot.png", 
        height = 4, width = 5)
 
@@ -101,6 +101,6 @@ heat <- ggplot(data = graphData) +
         plot.background = element_rect(fill = "transparent",colour = NA))
 heat
 
-ggsave(heat, bg = "transparent",
+ggsave(heat, #bg = "transparent",
        filename = "heat_Bio.png", 
        height = 7, width = 11)
diff --git a/boxPlotWQ_ACC_simplified.R b/boxPlotWQ_ACC_simplified.R
@@ -141,7 +141,7 @@ fullData$class[fullData$class == "Detergent Metabolites"] <- "Detergent"
 
 x <- graphData[is.na(graphData$category),]  
 
-orderChem <- fullData %>% #not fullFULL...or just graphData....needs just tox and WQ
+orderChem <- graphData %>%#fullData %>% #not fullFULL...or just graphData....needs just tox and WQ
   group_by(category,class) %>%
   summarise(median = quantile(meanEAR[meanEAR != 0],0.5)) %>%
   data.frame() %>%
@@ -150,7 +150,9 @@ orderChem <- fullData %>% #not fullFULL...or just graphData....needs just tox an
 
 orderedLevels <- as.character(orderChem$category)
 orderedLevels <- orderedLevels[!is.na(orderedLevels)]
-orderedLevels <- c(orderedLevels, unique(fullFULL$category)[which(!(unique(fullFULL$category) %in% orderedLevels))],"")
+orderedLevels <- c(orderedLevels[1:2], "Cumene", 
+                   orderedLevels[3:4],"Bromoform",
+                   orderedLevels[5:length(orderedLevels)])
 
 fullFULL <- fullFULL %>%
   mutate(guideline = factor(as.character(guideline), levels=c("ToxCast","Traditional")),
@@ -172,8 +174,8 @@ textData <- data.frame(guideline = factor(c(rep("Traditional", 2),
                                              "Water Quality Guidelines","EEQ",
                                              "Water Quality Guidelines","EEQ"), levels = levels(fullFULL$otherThing)),
                        category = factor(c("2-Methylnaphthalene","1,4-Dichlorobenzene", 
-                                           "Pentachlorophenol","Bisphenol A",
-                                           "Pentachlorophenol","Bisphenol A"), levels = levels(fullFULL$category)),
+                                           "Bisphenol A","Bisphenol A",
+                                           "Bisphenol A","Bisphenol A"), levels = levels(fullFULL$category)),
                        textExplain = c("Water Quality Guidelines Quotients",
                                        "Estradiol Equivalent Quotients",
                                        "A","B","C","D"),

diff --git a/getDataReady.R b/getDataReady.R
@@ -50,21 +50,21 @@ assays <- c("ATG","NVS","OT","TOX21","CEETOX", "APR", #"APR"?,"BSK"?
             "CLD","TANGUAY","NHEERL_PADILLA",
             "NCCT_SIMMONS","ACEA")
 
-ep <- select(endPointInfo, 
-             endPoint = assay_component_endpoint_name,
-             groupCol = intended_target_family,
-             assaysFull = assay_source_name) %>%
-  filter(assaysFull %in% assays) %>%
-  filter(!(groupCol %in% c("background measurement"))) %>% #,"cell morphology", "cell cycle"))) %>%
-  filter(!is.na(groupCol))
-
 cleanUpNames <- endPointInfo$intended_target_family
 cleanUpNames <- stri_trans_totitle(cleanUpNames)
 cleanUpNames[grep("Dna",cleanUpNames)] <- "DNA Binding"
 cleanUpNames[grep("Cyp",cleanUpNames)] <- "CYP"
 cleanUpNames[grep("Gpcr",cleanUpNames)] <- "GPCR"
 endPointInfo$intended_target_family <- cleanUpNames
 
+ep <- select(endPointInfo, 
+             endPoint = assay_component_endpoint_name,
+             groupCol = intended_target_family,
+             assaysFull = assay_source_name) %>%
+  filter(assaysFull %in% assays) %>%
+  filter(!(groupCol %in% c("Background Measurement"))) %>% #,"cell morphology", "cell cycle"))) %>%
+  filter(!is.na(groupCol))
+
 chemicalSummary.orig <- readRDS(file.path(pathToApp,"chemicalSummary_ACC.rds"))
 chemicalSummary.orig$chnm[chemicalSummary.orig$chnm == "4-(1,1,3,3-Tetramethylbutyl)phenol"] <- "4-tert-Octylphenol"
 
@@ -90,7 +90,7 @@ stationINFO$shortName[stationINFO$shortName == "Kalamazoo2"] <- "Kalamazoo"
 stationINFO$shortName[stationINFO$shortName == "Cheboygan2"] <- "Cheboygan"
 
 
-flagsShort <- c("Borderline",  "OneAbove",
+flagsShort <- c("Borderline",  "OnlyHighest",
                 "GainAC50", "Biochemical")
 # flagsShort <- c("Borderline", "OnlyHighest", "OneAbove","Noisy",
 #                 "HitCall", "GainAC50", "Biochemical")
@@ -105,15 +105,6 @@ for(i in flagsShort){
 chemicalSummary <- left_join(chemicalSummary, select(flagDF, casn,endPoint, flags),
                              by=c("casrn"="casn", "endPoint"="endPoint"))
 
-flagSum <- chemicalSummary %>%
-  filter(EAR > 10^-3) %>%
-  filter(!is.na(flags)) %>%
-  group_by(chnm, endPoint, flags) %>%
-  summarise(count = n(),
-            max = max(EAR, na.rm = TRUE)) %>%
-  filter(count > 10) %>%
-  arrange(desc(max))
-
 graphData <- chemicalSummary %>%
   group_by(site,date,category,class) %>%
   summarise(sumEAR=sum(EAR)) %>%

diff --git a/heatMap.R b/heatMap.R
@@ -73,11 +73,11 @@ heat <- ggplot(data = graphData_filtered) +
         plot.background = element_rect(fill = "transparent",colour = NA))
 heat
 
-ggsave(heat, bg = "transparent",
+ggsave(heat, #bg = "transparent",
        filename = "heat_2.png", 
        height = 5, width = 8.5)
 
-ggsave(heat, bg = "transparent",
+ggsave(heat, #bg = "transparent",
        filename = "heat_Unfiltered.png", 
        height = 9, width = 11)
 
diff --git a/inst/extdata/birdData.rds b/inst/extdata/birdData.rds
diff --git a/inst/extdata/birdSites.rds b/inst/extdata/birdSites.rds
diff --git a/inst/extdata/leviData.rds b/inst/extdata/leviData.rds
diff --git a/inst/extdata/leviSites.rds b/inst/extdata/leviSites.rds
diff --git a/inst/shiny/server.R b/inst/shiny/server.R
@@ -21,39 +21,12 @@ endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name %
                                       c("CLD_CYP1A1_24hr","CLD_CYP1A1_48hr","CLD_CYP1A1_6hr",
                                         "CLD_CYP1A2_24hr","CLD_CYP1A2_48hr","CLD_CYP1A2_6hr")] <- "dna binding"
 
-endPointInfo$intended_target_family_sub[endPointInfo$assay_component_endpoint_name %in% 
-                                      c("CLD_CYP1A1_24hr","CLD_CYP1A1_48hr","CLD_CYP1A1_6hr",
-                                        "CLD_CYP1A2_24hr","CLD_CYP1A2_48hr","CLD_CYP1A2_6hr")] <- "basic helix-loop-helix protein"
-
-endPointInfo$intended_target_official_symbol[endPointInfo$assay_component_endpoint_name %in% 
-                                          c("CLD_CYP1A1_24hr","CLD_CYP1A1_48hr","CLD_CYP1A1_6hr",
-                                            "CLD_CYP1A2_24hr","CLD_CYP1A2_48hr","CLD_CYP1A2_6hr")] <- "AhR"
-
 endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name %in% 
                                       c("CLD_CYP2B6_24hr","CLD_CYP2B6_48hr","CLD_CYP2B6_6hr",
                                         "CLD_CYP3A4_24hr","CLD_CYP3A4_48hr","CLD_CYP3A4_6hr",
                                         "CLD_SULT2A_48hr","CLD_UGT1A1_48hr","NVS_NR_bER",
                                         "NVS_NR_bPR","NVS_NR_cAR")] <- "nuclear receptor"
 
-endPointInfo$intended_target_family_sub[endPointInfo$assay_component_endpoint_name %in% 
-                                      c("CLD_CYP2B6_24hr","CLD_CYP2B6_48hr","CLD_CYP2B6_6hr",
-                                        "CLD_CYP3A4_24hr","CLD_CYP3A4_48hr","CLD_CYP3A4_6hr",
-                                        "CLD_SULT2A_48hr","CLD_UGT1A1_48hr")] <- "non-steroidal"
-
-endPointInfo$intended_target_official_symbol[endPointInfo$assay_component_endpoint_name %in% 
-                                    c("CLD_CYP2B6_24hr","CLD_CYP2B6_48hr","CLD_CYP2B6_6hr",
-                                      "CLD_SULT2A_48hr")] <- "NR1I3" 
-
-endPointInfo$intended_target_official_symbol[endPointInfo$assay_component_endpoint_name %in% 
-                                       c("CLD_CYP3A4_24hr","CLD_CYP3A4_48hr","CLD_CYP3A4_6hr",
-                                         "CLD_SULT2A_48hr")] <- "NR1I2" 
-
-endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name %in% 
-                                      c("NVS_NR_bER", "NVS_NR_bPR","NVS_NR_cAR")] <- "steroidal"
-
-endPointInfo$intended_target_official_symbol[endPointInfo$assay_component_endpoint_name %in% 
-                                               c("NVS_NR_bER", "NVS_NR_bPR","NVS_NR_cAR")] <- c("ESR","PGR","AR")
-
 endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name %in% 
                                       c("Tanguay_ZF_120hpf_ActivityScore",
                                         "Tanguay_ZF_120hpf_AXIS_up",
@@ -73,7 +46,6 @@ endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name %
                                         "Tanguay_ZF_120hpf_TRUN_up",
                                         "Tanguay_ZF_120hpf_YSE_up")] <- "zebrafish"
 
-
 choicesPerGroup <- apply(endPointInfo, 2, function(x) length(unique(x[!is.na(x)])))
 choicesPerGroup <- which(choicesPerGroup > 6 & choicesPerGroup < 100)
 
@@ -88,17 +60,26 @@ cleanUpNames[grep("Gpcr",cleanUpNames)] <- "GPCR"
 endPointInfo$intended_target_family <- cleanUpNames
 
 sitesOrdered <- c("StLouis","Pigeon","Nemadji","WhiteWI","Bad","Montreal","PresqueIsle",
-                  "Ontonagon","Sturgeon","Tahquamenon","Manistique","Escanaba","Ford","Cheboygan2","Indian",
-                  "Menominee","Peshtigo","Oconto","Fox","Manistee","Manitowoc","PereMarquette","Sheboygan",
-                  "WhiteMI","Muskegon","MilwaukeeMouth","GrandMI","Kalamazoo2","PawPaw",
-                  "StJoseph","IndianaHC","Burns","ThunderBay","AuSable","Rifle",
+                  "Ontonagon","Sturgeon","Tahquamenon",
+                  "Burns","IndianaHC","StJoseph","PawPaw",        
+                  "Kalamazoo2","Kalamazoo","GrandMI","MilwaukeeMouth","Muskegon",      
+                  "WhiteMI","Sheboygan","PereMarquette","Manitowoc",    
+                  "Manistee","Fox","Oconto","Peshtigo",      
+                  "Menominee","Indian","Cheboygan2","Cheboygan","Ford",         
+                  "Escanaba","Manistique",
+                  "ThunderBay","AuSable","Rifle",
                   "Saginaw","Saginaw2","BlackMI","Clinton","Rouge","HuronMI","Raisin","Maumee",
                   "Portage","Sandusky","HuronOH","Vermilion","BlackOH","Rocky","Cuyahoga","GrandOH",
                   "Cattaraugus","Tonawanda","Genesee","Oswego","BlackNY","Oswegatchie","Grass","Raquette","StRegis")
 
 pathToApp <- system.file("extdata", package="toxEval")
 
 stationINFO <- readRDS(file.path(pathToApp,"sitesOWC.rds"))
+stationINFO$Lake[stationINFO$shortName == "BlackMI"] <- "Lake Huron"
+stationINFO$Lake[stationINFO$shortName == "HuronMI"] <- "Lake Erie"
+stationINFO$Lake[stationINFO$shortName == "ClintonDP"] <- "Lake Erie"
+stationINFO$Lake[stationINFO$shortName == "Clinton"] <- "Lake Erie"
+
 summaryFile <- readRDS(file.path(pathToApp,"summary.rds"))
 endPoint <- readRDS(file.path(pathToApp,"endPoint.rds"))
 
@@ -107,16 +88,15 @@ df2016 <- readRDS(file.path(pathToApp,"df2016.rds"))
 choicesPerGroup <- apply(endPointInfo[,-1], 2, function(x) length(unique(x)))
 groupChoices <- paste0(names(choicesPerGroup)," (",choicesPerGroup,")")
 
-initAssay <-  c("ATG","NVS","OT","TOX21","CEETOX",
-                "CLD","TANGUAY","NHEERL_PADILLA",
-                "NCCT_SIMMONS","ACEA")
+initAssay <- c("ATG","NVS","OT","TOX21","CEETOX", "APR", #"APR"?,"BSK"?
+            "CLD","TANGUAY","NHEERL_PADILLA",
+            "NCCT_SIMMONS","ACEA")
 
 # flags <- unique(AC50gain$flags[!is.na(AC50gain$flags)])
 # flags <- unique(unlist(strsplit(flags, "\\|")))
-flags <- c("Borderline active","Only highest conc above baseline, active" ,      
-           "Only one conc above baseline, active",                              
-           "Gain AC50 < lowest conc & loss AC50 < mean conc",
-           "Biochemical assay with < 50% efficacy")
+flags <- c("Noisy data",
+           "Only one conc above baseline, active",
+           "Hit-call potentially confounded by overfitting")
 
 flagsALL <- c("Borderline active","Only highest conc above baseline, active" ,      
            "Only one conc above baseline, active","Noisy data",                                 
@@ -173,6 +153,10 @@ shinyServer(function(input, output,session) {
     } else if(input$data == "App State"){
       leviSites <- readRDS(file.path(pathToApp,"leviSites.rds"))
       choices = c("All",leviSites$shortName)
+
+    } else if(input$data == "Birds"){
+      birdSites <- readRDS(file.path(pathToApp,"birdSites.rds"))
+      choices = c("All",birdSites$shortName)
     } else {
       choices =  c("All","2016 GLRI SP sites",summaryFile$site)
     }
@@ -333,6 +317,9 @@ shinyServer(function(input, output,session) {
       chemicalSummary <- readRDS(file.path(pathToApp,"leviData.rds"))
       stationINFO <<- readRDS(file.path(pathToApp,"leviSites.rds"))
 
+    } else if (input$data == "Birds"){
+      chemicalSummary <- readRDS(file.path(pathToApp,"birdData.rds"))
+      stationINFO <<- readRDS(file.path(pathToApp,"birdSites.rds"))      
     }
 
     chemicalSummary <- chemicalSummary %>%

diff --git a/inst/shiny/ui.R b/inst/shiny/ui.R
@@ -22,39 +22,12 @@ endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name %
                                       c("CLD_CYP1A1_24hr","CLD_CYP1A1_48hr","CLD_CYP1A1_6hr",
                                         "CLD_CYP1A2_24hr","CLD_CYP1A2_48hr","CLD_CYP1A2_6hr")] <- "dna binding"
 
-endPointInfo$intended_target_family_sub[endPointInfo$assay_component_endpoint_name %in% 
-                                          c("CLD_CYP1A1_24hr","CLD_CYP1A1_48hr","CLD_CYP1A1_6hr",
-                                            "CLD_CYP1A2_24hr","CLD_CYP1A2_48hr","CLD_CYP1A2_6hr")] <- "basic helix-loop-helix protein"
-
-endPointInfo$intended_target_official_symbol[endPointInfo$assay_component_endpoint_name %in% 
-                                               c("CLD_CYP1A1_24hr","CLD_CYP1A1_48hr","CLD_CYP1A1_6hr",
-                                                 "CLD_CYP1A2_24hr","CLD_CYP1A2_48hr","CLD_CYP1A2_6hr")] <- "AhR"
-
 endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name %in% 
                                       c("CLD_CYP2B6_24hr","CLD_CYP2B6_48hr","CLD_CYP2B6_6hr",
                                         "CLD_CYP3A4_24hr","CLD_CYP3A4_48hr","CLD_CYP3A4_6hr",
                                         "CLD_SULT2A_48hr","CLD_UGT1A1_48hr","NVS_NR_bER",
                                         "NVS_NR_bPR","NVS_NR_cAR")] <- "nuclear receptor"
 
-endPointInfo$intended_target_family_sub[endPointInfo$assay_component_endpoint_name %in% 
-                                          c("CLD_CYP2B6_24hr","CLD_CYP2B6_48hr","CLD_CYP2B6_6hr",
-                                            "CLD_CYP3A4_24hr","CLD_CYP3A4_48hr","CLD_CYP3A4_6hr",
-                                            "CLD_SULT2A_48hr","CLD_UGT1A1_48hr")] <- "non-steroidal"
-
-endPointInfo$intended_target_official_symbol[endPointInfo$assay_component_endpoint_name %in% 
-                                               c("CLD_CYP2B6_24hr","CLD_CYP2B6_48hr","CLD_CYP2B6_6hr",
-                                                 "CLD_SULT2A_48hr")] <- "NR1I3" 
-
-endPointInfo$intended_target_official_symbol[endPointInfo$assay_component_endpoint_name %in% 
-                                               c("CLD_CYP3A4_24hr","CLD_CYP3A4_48hr","CLD_CYP3A4_6hr",
-                                                 "CLD_SULT2A_48hr")] <- "NR1I2" 
-
-endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name %in% 
-                                      c("NVS_NR_bER", "NVS_NR_bPR","NVS_NR_cAR")] <- "steroidal"
-
-endPointInfo$intended_target_official_symbol[endPointInfo$assay_component_endpoint_name %in% 
-                                               c("NVS_NR_bER", "NVS_NR_bPR","NVS_NR_cAR")] <- c("ESR","PGR","AR")
-
 endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name %in% 
                                       c("Tanguay_ZF_120hpf_ActivityScore",
                                         "Tanguay_ZF_120hpf_AXIS_up",
@@ -74,8 +47,6 @@ endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name %
                                         "Tanguay_ZF_120hpf_TRUN_up",
                                         "Tanguay_ZF_120hpf_YSE_up")] <- "zebrafish"
 
-
-
 choicesPerGroup <- apply(endPointInfo, 2, function(x) length(unique(x[!is.na(x)])))
 choicesPerGroup <- which(choicesPerGroup > 6 & choicesPerGroup < 100)
 
@@ -110,10 +81,9 @@ selChoices <- df$orderNames
 
 # flags <- unique(AC50gain$flags[!is.na(AC50gain$flags)])
 # flags <- unique(unlist(strsplit(flags, "\\|")))
-flags <- c("Borderline active","Only highest conc above baseline, active" ,      
-           "Only one conc above baseline, active",                              
-           "Gain AC50 < lowest conc & loss AC50 < mean conc",
-           "Biochemical assay with < 50% efficacy")
+flags <- c("Noisy data",
+           "Only one conc above baseline, active",
+           "Hit-call potentially confounded by overfitting")
 
 flagsALL <- c("Borderline active","Only highest conc above baseline, active" ,      
               "Only one conc above baseline, active","Noisy data",                                 
@@ -130,7 +100,8 @@ sidebar <- dashboardSidebar(
                                  "Duluth",
                                  "NPS",
                                  "TSHP",
-                                 "App State"),
+                                 "App State",
+                                 "Birds"),
                                  # ,"Detection Limits"),
                      selected = "Water Sample", multiple = FALSE),
    radioButtons("radioMaxGroup", label = "",
@@ -159,9 +130,9 @@ sidebar <- dashboardSidebar(
                                  "NHEERL_PADILLA"="NHEERL_PADILLA",
                                  "NCCT_SIMMONS"="NCCT_SIMMONS",
                                  "ACEA Biosciences" = "ACEA"),
-              selected=c("APR","ATG","NVS","OT","TOX21","CEETOX",
-                         "CLD","TANGUAY","NHEERL_PADILLA",
-                         "NCCT_SIMMONS","ACEA")),
+              selected= c("ATG","NVS","OT","TOX21","CEETOX", "APR", 
+                            "CLD","TANGUAY","NHEERL_PADILLA",
+                            "NCCT_SIMMONS","ACEA")),
               actionButton("pickAssay", label="Switch Assays")),
    menuItem("Annotation", icon = icon("th"), tabName = "annotation",
             selectInput("groupCol", label = "Annotation (# Groups)",