diff --git a/DESCRIPTION b/DESCRIPTION index 84587242..c5aaa016 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: toxEval Type: Package Title: ToxCast Evaluations -Version: 0.2.7 -Date: 2016-12-21 +Version: 0.2.8 +Date: 2017-01-04 Authors@R: c( person("Steven", "Corsi", role = c("aut"), email = "srcorsi@usgs.gov"), person("Laura", "DeCicco", role = c("aut","cre"), diff --git a/biologicalBP.R b/biologicalBP.R new file mode 100644 index 00000000..844fde11 --- /dev/null +++ b/biologicalBP.R @@ -0,0 +1,106 @@ +library(toxEval) +library(dplyr) +library(data.table) +library(tidyr) +library(ggplot2) +library(grid) +library(gridExtra) + +source("getDataReady.R") + +graphData <- chemicalSummary %>% + left_join(select(endPointInfo, endPoint=assay_component_name, intended_target_family), + by=c("endPoint")) %>% + select(-category) %>% + rename(category = choices) %>% + group_by(site,date,category) %>% + summarise(sumEAR=sum(EAR)) %>% + data.frame() %>% + group_by(site, category) %>% + summarise(meanEAR=max(sumEAR)) %>% + data.frame() %>% + mutate(category=as.character(category)) %>% + filter(!is.na(category)) %>% + filter(meanEAR > 0) + +orderColsBy <- graphData %>% + group_by(category) %>% + summarise(median = median(meanEAR[meanEAR != 0])) %>% + arrange(median) + +orderedLevels <- orderColsBy$category + +if(any(is.na(orderColsBy$median))){ + orderedLevels <- c(orderColsBy$category[is.na(orderColsBy$median)], + orderColsBy$category[!is.na(orderColsBy$median)]) +} + +orderNames <- names(table(select(endPointInfo, intended_target_family))) + +orderedLevels <- c(orderNames[!(orderNames %in% orderedLevels)],orderedLevels) + +graphData$category <- factor(as.character(graphData$category), levels=orderedLevels) + +countNonZero <- graphData %>% + group_by(category) %>% + summarise(nonZero = as.character(sum(meanEAR>0))) %>% + data.frame() + +bioPlot <- ggplot(graphData)+ + scale_y_log10("Maximum EAR Per Site",labels=fancyNumbers)+ + geom_boxplot(aes(x=category, y=meanEAR),lwd=0.1,outlier.size=1, fill = "steelblue") + + coord_flip() + + theme_bw() + + xlab("") + + theme(plot.background = element_rect(fill = "transparent",colour = NA), + axis.text.y = element_text(size=10, color = "black", vjust = 0.2), + axis.text.x = element_text(size=10, color = "black", vjust = 0, margin = margin(-0.5,0,0,0)), + axis.title = element_text(size=10)) + +xmin <<- 10^(ggplot_build(bioPlot)$layout$panel_ranges[[1]]$x.range[1]) + +bioPlot <- bioPlot + + geom_text(data=countNonZero, aes(x=category, y=xmin,label=nonZero),size=3) + +bioPlot + +ggsave(bioPlot, #bg = "transparent", + filename = "bioPlot.png", + height = 4, width = 5) + +graphData <- graphData %>% + mutate(site = factor(site, levels = sitesOrdered[sitesOrdered %in% siteLimits$shortName])) + +graphData <- left_join(graphData, select(siteLimits, shortName, Lake), by=c("site" = "shortName")) +graphData$Lake <- gsub(" River","", graphData$Lake) +graphData$Lake[graphData$site == "StRegis"] <- "Lake Ontario" +graphData$Lake <- factor(gsub("Lake ","",graphData$Lake), c("Superior", "Michigan","Huron","Erie", "Ontario")) + +heat <- ggplot(data = graphData) + + geom_tile(aes(x = site, y=category, fill=meanEAR)) + + theme_bw() + + theme(axis.text.x = element_text( angle = 90,vjust=0.5,hjust = 1)) + + ylab("") + + xlab("") + + labs(fill="Maximum EAR") + + scale_fill_gradient( guide = "legend", + trans = 'log', + low = "white", high = "steelblue", + breaks=c(0.00001,0.0001,0.001,0.01,0.1,1,5), + na.value = 'transparent',labels=fancyNumbers2) + + facet_grid(. ~ Lake,scales="free", space="free") + + theme(strip.text.y = element_text(angle=0, hjust=0, size=7), + strip.text.x = element_text(size = 7), + strip.background = element_rect(fill="transparent", colour = NA), + axis.text = element_text(size=7), + panel.spacing = unit(0.05, "lines"), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.text = element_text(size=8), + legend.title = element_text(size =8), + plot.background = element_rect(fill = "transparent",colour = NA)) +heat + +ggsave(heat, #bg = "transparent", + filename = "heat_Bio.png", + height = 7, width = 11) diff --git a/boxPlotWQ_ACC_simplified.R b/boxPlotWQ_ACC_simplified.R new file mode 100644 index 00000000..bb6d74c3 --- /dev/null +++ b/boxPlotWQ_ACC_simplified.R @@ -0,0 +1,252 @@ +library(toxEval) +library(dplyr) +library(data.table) +library(tidyr) +library(ggplot2) +library(grid) +library(gridExtra) + +source("getDataReady.R") + +########################################################################### +# WQ boxplot +filePath <- file.path(pathToApp, "waterSamples.RData") +load(file=filePath) +waterSamples$site["USGS-04157005" == waterSamples$site] <- "USGS-04157000" +valColumns <- grep("valueToUse", names(waterSamples)) +qualColumns <- grep("qualifier", names(waterSamples)) +waterData <- waterSamples[,valColumns] +waterData[waterSamples[,qualColumns] == "<"] <- 0 +wData <- cbind(waterSamples[,1:2],waterData) + +wDataLong <- gather(wData, pCode, measuredValue, -ActivityStartDateGiven, -site) %>% + rename(date = ActivityStartDateGiven) %>% + filter(!is.na(measuredValue)) %>% + mutate(pCode = gsub("valueToUse_", replacement = "", pCode)) %>% + left_join(select(pCodeInfo, parameter_cd, casrn, class,parameter_nm, + # EEF_avg_in.vitro,EEF_max_in.vitro_or_in.vivo, + AqT_EPA_acute,AqT_EPA_chronic, + AqT_other_acute,AqT_other_chronic), + by=c("pCode" = "parameter_cd")) %>% + gather(endPoint, value, + # EEF_avg_in.vitro,EEF_max_in.vitro_or_in.vivo, + AqT_EPA_acute,AqT_EPA_chronic, + AqT_other_acute,AqT_other_chronic) %>% + mutate(EAR = measuredValue/value) %>% + filter(!is.na(EAR)) + +waterSamplePCodes <- unique(wDataLong$pCode) + +toxCastChems <- gather(ACC, endPoint, ACC, -casn, -chnm, -flags) %>% + filter(!is.na(ACC)) %>% + left_join(pCodeInfo[pCodeInfo$parameter_cd %in% waterSamplePCodes,c("casrn", "parameter_units", "mlWt")], + by= c("casn"="casrn")) %>% + filter(!is.na(parameter_units)) %>% + select(casn, chnm) %>% + distinct() + +wDataLong <- wDataLong %>% + left_join(toxCastChems, by=c("casrn"="casn")) %>% + filter(!is.na(chnm)) %>% + mutate(parameter_nm = chnm) %>% + select(-chnm) + +wDataLong$parameter_nm[wDataLong$parameter_nm == "4-(1,1,3,3-Tetramethylbutyl)phenol"] <- "4-tert-Octylphenol" + +graphData.wq <- wDataLong %>% + group_by(site,date,parameter_nm,class) %>% + summarise(sumEAR=sum(EAR)) %>% + data.frame() %>% + group_by(site, parameter_nm,class) %>% + summarise(meanEAR=max(sumEAR)) %>% + data.frame() + +################################################## +# Regular toxEval stuff: +graphData <- graphData %>% + mutate(category = as.character(category), + guideline = "ToxCast") + +graphData.full_WQ <- mutate(graphData.wq, class=as.character(class)) %>% + rename(category = parameter_nm) %>% + mutate(guideline = "Traditional") + +graphData.full_WQ$class[graphData.full_WQ$class == "Detergent Metabolites"] <- "Detergent" + +wDataLong_EQ <- gather(wData, pCode, measuredValue, -ActivityStartDateGiven, -site) %>% + rename(date = ActivityStartDateGiven) %>% + filter(!is.na(measuredValue)) %>% + mutate(pCode = gsub("valueToUse_", replacement = "", pCode)) %>% + left_join(select(pCodeInfo, parameter_cd, casrn, class,parameter_nm, + EEF_max_in.vitro_or_in.vivo), + by=c("pCode" = "parameter_cd")) %>% + gather(endPoint, value, EEF_max_in.vitro_or_in.vivo) %>% + mutate(EAR = measuredValue*value/0.7) %>% + filter(!is.na(EAR)) + +wDataLong_EQ$parameter_nm[wDataLong_EQ$parameter_nm == "4-(1,1,3,3-Tetramethylbutyl)phenol"] <- "4-tert-Octylphenol" + +waterSamplePCodes_EQ <- unique(wDataLong_EQ$pCode) + +toxCastChems_EQ <- gather(ACC, endPoint, ACC, -casn, -chnm, -flags) %>% + filter(!is.na(ACC)) %>% + left_join(pCodeInfo[pCodeInfo$parameter_cd %in% waterSamplePCodes_EQ,c("casrn", "parameter_units", "mlWt")], + by= c("casn"="casrn")) %>% + filter(!is.na(parameter_units)) %>% + select(casn, chnm) %>% + distinct() + +toxCastChems_EQ$chnm[toxCastChems_EQ$chnm == "4-(1,1,3,3-Tetramethylbutyl)phenol"] <- "4-tert-Octylphenol" + +wDataLong_EQ <- wDataLong_EQ %>% + left_join(toxCastChems_EQ, by=c("casrn"="casn")) %>% + filter(!is.na(chnm)) %>% + mutate(parameter_nm = chnm) %>% + select(-chnm) + +graphData.eq <- wDataLong_EQ %>% + group_by(site,date,parameter_nm,class) %>% + summarise(sumEAR=sum(EAR)) %>% + data.frame() %>% + group_by(site, parameter_nm,class) %>% + summarise(meanEAR=max(sumEAR)) %>% + data.frame() %>% + mutate(guideline = "Traditional") %>% + rename(category = parameter_nm) + +subTox <- filter(graphData, category %in% graphData.eq$category) %>% + mutate(otherThing = "EEQ") + +subTox$category[subTox$category == "4-(1,1,3,3-Tetramethylbutyl)phenol"] <- "4-tert-Octylphenol" + +EQ <- graphData.eq %>% + mutate(otherThing = "EEQ") + +EQ$category[EQ$category == "4-(1,1,3,3-Tetramethylbutyl)phenol"] <- "4-tert-Octylphenol" + +subToxWQ <- graphData %>% + mutate(otherThing = "Water Quality Guidelines") + +WQ <- graphData.wq %>% + rename(category = parameter_nm) %>% + mutate(otherThing = "Water Quality Guidelines") %>% + mutate(guideline = "Traditional") + +fullFULL <- bind_rows(subTox, EQ, subToxWQ, WQ) %>% + mutate(class = factor(class, levels=rev(as.character(orderClass$class)))) + +fullData <- bind_rows(graphData.full_WQ, graphData) + +fullData$class[fullData$class == "Detergent Metabolites"] <- "Detergent" + +x <- graphData[is.na(graphData$category),] + +orderChem <- graphData %>%#fullData %>% #not fullFULL...or just graphData....needs just tox and WQ + group_by(category,class) %>% + summarise(median = quantile(meanEAR[meanEAR != 0],0.5)) %>% + data.frame() %>% + mutate(class = factor(class, levels=orderClass$class)) %>% + arrange(class, median) + +orderedLevels <- as.character(orderChem$category) +orderedLevels <- orderedLevels[!is.na(orderedLevels)] +orderedLevels <- c(orderedLevels[1:2], "Cumene", + orderedLevels[3:4],"Bromoform", + orderedLevels[5:length(orderedLevels)]) + +fullFULL <- fullFULL %>% + mutate(guideline = factor(as.character(guideline), levels=c("ToxCast","Traditional")), + otherThing = factor(as.character(otherThing), levels = c("Water Quality Guidelines","EEQ"))) %>% + mutate(class = factor(class, levels=rev(orderClass$class))) %>% + mutate(category = factor(category, levels=orderedLevels)) + +fullFULL$class[fullFULL$category == "4-Nonylphenol"] <- "Detergent" + +fullFULL$class[which(is.na(fullFULL$class))] <- "Other" + +cbValues <- c("#DCDA4B","#999999","#00FFFF","#CEA226","#CC79A7","#4E26CE", + "#FFFF00","#78C15A","#79AEAE","#FF0000","#00FF00","#B1611D", + "#FFA500") + +textData <- data.frame(guideline = factor(c(rep("Traditional", 2), + rep("ToxCast", 2),rep("Traditional", 2)), levels = levels(fullFULL$guideline)), + otherThing = factor(c("Water Quality Guidelines","EEQ", + "Water Quality Guidelines","EEQ", + "Water Quality Guidelines","EEQ"), levels = levels(fullFULL$otherThing)), + category = factor(c("2-Methylnaphthalene","1,4-Dichlorobenzene", + "Bisphenol A","Bisphenol A", + "Bisphenol A","Bisphenol A"), levels = levels(fullFULL$category)), + textExplain = c("Water Quality Guidelines Quotients", + "Estradiol Equivalent Quotients", + "A","B","C","D"), + y = c(0.5,0.5,10,10,100,100)) + +countNonZero <- fullFULL %>% + select(site, category,guideline,otherThing, meanEAR) %>% + group_by(site, category,guideline,otherThing) %>% + summarise(meanEAR = mean(meanEAR, na.rm=TRUE)) %>% + group_by(category,guideline,otherThing) %>% + summarise(nonZero = as.character(sum(meanEAR>0))) %>% + data.frame() %>% + select(category, otherThing, nonZero) %>% + distinct() %>% + mutate(guideline = factor(c("ToxCast"), levels = levels(fullFULL$guideline)), + otherThing = factor(otherThing, levels = levels(fullFULL$otherThing)), + category = factor(category, levels = levels(fullFULL$category))) + +countNonZero <- countNonZero[!duplicated(countNonZero[,1:2]),] + +astrictData <- countNonZero %>% + mutate(guideline = factor(c("Traditional"), levels = levels(fullFULL$guideline))) %>% + filter(otherThing == "Water Quality Guidelines") %>% + mutate(nonZero = "*") %>% + filter(!(category %in% unique(WQ$category))) + +levels(fullFULL$guideline) <- c("ToxCast\nMaximum EAR Per Site", + "Traditional*\nMaximum Quotient Per Site") +levels(countNonZero$guideline) <- c("ToxCast\nMaximum EAR Per Site", + "Traditional*\nMaximum Quotient Per Site") +levels(astrictData$guideline) <- c("ToxCast\nMaximum EAR Per Site", + "Traditional*\nMaximum Quotient Per Site") +levels(textData$guideline) <- c("ToxCast\nMaximum EAR Per Site", + "Traditional*\nMaximum Quotient Per Site") + +toxPlot_All <- ggplot(data=fullFULL) + + scale_y_log10(labels=fancyNumbers) + + geom_boxplot(aes(x=category, y=meanEAR, fill=class), + lwd=0.1,outlier.size=0) + + facet_grid(otherThing ~ guideline, scales = "free", space = "free") + + theme_bw() + + scale_x_discrete(drop=TRUE) + + coord_flip() + + theme(axis.text = element_text( color = "black"), + axis.text.y = element_text(size=7), + axis.title=element_blank(), + panel.background = element_blank(), + plot.background = element_rect(fill = "transparent",colour = NA), + strip.background = element_rect(fill = "transparent",colour = NA), + strip.text.y = element_blank()) + + guides(fill=guide_legend(ncol=6)) + + theme(legend.position="bottom", + legend.justification = "left", + legend.background = element_rect(fill = "transparent", colour = "transparent"), + legend.title=element_blank(), + legend.text = element_text(size=8), + legend.key.height = unit(1,"line")) + + scale_fill_manual(values = cbValues, drop=FALSE) + +ymin <- 10^-6 +ymax <- ggplot_build(toxPlot_All)$layout$panel_ranges[[1]]$y.range[2] + +toxPlot_All <- toxPlot_All + + geom_text(data=countNonZero, aes(x=category,label=nonZero, y=ymin), size=2.5) + + geom_text(data = textData, aes(x=category, label=textExplain, y=y), + size = 3) + + geom_text(data = astrictData, aes(x=category, label=nonZero, y=0.00002), + size=5, vjust = 0.70) + +toxPlot_All + +ggsave(toxPlot_All, bg = "transparent", + filename = "allPanels.png", + height = 10, width = 7.75) diff --git a/getDataReady.R b/getDataReady.R new file mode 100644 index 00000000..62a99419 --- /dev/null +++ b/getDataReady.R @@ -0,0 +1,193 @@ +library(toxEval) +library(dplyr) +library(data.table) +library(tidyr) +library(ggplot2) +library(grid) +library(gridExtra) +library(stringi) + +pCodeInfo <- pCodeInfo + +pathToApp <- system.file("extdata", package="toxEval") +endPointInfo <- endPointInfo + +endPointInfo <- endPointInfo[!(endPointInfo$assay_source_name == "ATG" & endPointInfo$signal_direction == "loss"),] +endPointInfo <- endPointInfo[!(endPointInfo$assay_source_name == "NVS" & endPointInfo$signal_direction == "gain"),] +endPointInfo <- endPointInfo[endPointInfo$assay_component_endpoint_name != "TOX21_p53_BLA_p3_ratio",] +endPointInfo <- endPointInfo[endPointInfo$assay_component_endpoint_name != "TOX21_p53_BLA_p2_viability",] + +endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name %in% + c("CLD_CYP1A1_24hr","CLD_CYP1A1_48hr","CLD_CYP1A1_6hr", + "CLD_CYP1A2_24hr","CLD_CYP1A2_48hr","CLD_CYP1A2_6hr")] <- "dna binding" + +endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name %in% + c("CLD_CYP2B6_24hr","CLD_CYP2B6_48hr","CLD_CYP2B6_6hr", + "CLD_CYP3A4_24hr","CLD_CYP3A4_48hr","CLD_CYP3A4_6hr", + "CLD_SULT2A_48hr","CLD_UGT1A1_48hr","NVS_NR_bER", + "NVS_NR_bPR","NVS_NR_cAR")] <- "nuclear receptor" + +endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name %in% + c("Tanguay_ZF_120hpf_ActivityScore", + "Tanguay_ZF_120hpf_AXIS_up", + "Tanguay_ZF_120hpf_BRAI_up", + "Tanguay_ZF_120hpf_CFIN_up", + "Tanguay_ZF_120hpf_EYE_up", + "Tanguay_ZF_120hpf_JAW_up", + "Tanguay_ZF_120hpf_MORT_up", + "Tanguay_ZF_120hpf_OTIC_up", + "Tanguay_ZF_120hpf_PE_up", + "Tanguay_ZF_120hpf_PFIN_up", + "Tanguay_ZF_120hpf_PIG_up", + "Tanguay_ZF_120hpf_SNOU_up", + "Tanguay_ZF_120hpf_SOMI_up", + "Tanguay_ZF_120hpf_SWIM_up", + "Tanguay_ZF_120hpf_TR_up", + "Tanguay_ZF_120hpf_TRUN_up", + "Tanguay_ZF_120hpf_YSE_up")] <- "zebrafish" + +assays <- c("ATG","NVS","OT","TOX21","CEETOX", "APR", #"APR"?,"BSK"? + "CLD","TANGUAY","NHEERL_PADILLA", + "NCCT_SIMMONS","ACEA") + +cleanUpNames <- endPointInfo$intended_target_family +cleanUpNames <- stri_trans_totitle(cleanUpNames) +cleanUpNames[grep("Dna",cleanUpNames)] <- "DNA Binding" +cleanUpNames[grep("Cyp",cleanUpNames)] <- "CYP" +cleanUpNames[grep("Gpcr",cleanUpNames)] <- "GPCR" +endPointInfo$intended_target_family <- cleanUpNames + +ep <- select(endPointInfo, + endPoint = assay_component_endpoint_name, + groupCol = intended_target_family, + assaysFull = assay_source_name) %>% + filter(assaysFull %in% assays) %>% + filter(!(groupCol %in% c("Background Measurement"))) %>% #,"cell morphology", "cell cycle"))) %>% + filter(!is.na(groupCol)) + +chemicalSummary.orig <- readRDS(file.path(pathToApp,"chemicalSummary_ACC.rds")) +chemicalSummary.orig$chnm[chemicalSummary.orig$chnm == "4-(1,1,3,3-Tetramethylbutyl)phenol"] <- "4-tert-Octylphenol" + +stationINFO <- readRDS(file.path(pathToApp,"sitesOWC.rds")) + +chemicalSummary <- chemicalSummary.orig %>% + filter(endPoint %in% ep$endPoint) %>% + data.table() %>% + left_join(data.table(ep), by="endPoint") %>% + data.frame() %>% + select_("EAR","chnm","class","date","groupCol","site","endPoint","casrn") %>% + rename(choices = groupCol) %>% + mutate(category=chnm) %>% + left_join(stationINFO[,c("fullSiteID","shortName")], by=c("site"="fullSiteID")) %>% + select(-site) %>% + rename(site=shortName) + +chemicalSummary$site[chemicalSummary$site == "Saginaw2"] <- "Saginaw" +chemicalSummary$site[chemicalSummary$site == "Cheboygan2"] <- "Cheboygan" +chemicalSummary$site[chemicalSummary$site == "Kalamazoo2"] <- "Kalamazoo" + +stationINFO$shortName[stationINFO$shortName == "Kalamazoo2"] <- "Kalamazoo" +stationINFO$shortName[stationINFO$shortName == "Cheboygan2"] <- "Cheboygan" + + +flagsShort <- c("Borderline", "OnlyHighest", + "GainAC50", "Biochemical") +# flagsShort <- c("Borderline", "OnlyHighest", "OneAbove","Noisy", +# "HitCall", "GainAC50", "Biochemical") +for(i in flagsShort){ + take.out.flags <- flagDF[!flagDF[[i]],c("casn","endPoint")] + + chemicalSummary <- right_join(chemicalSummary, take.out.flags, + by=c("casrn"="casn", "endPoint"="endPoint")) %>% + filter(!is.na(chnm)) +} + +chemicalSummary <- left_join(chemicalSummary, select(flagDF, casn,endPoint, flags), + by=c("casrn"="casn", "endPoint"="endPoint")) + +graphData <- chemicalSummary %>% + group_by(site,date,category,class) %>% + summarise(sumEAR=sum(EAR)) %>% + data.frame() %>% + group_by(site, category,class) %>% + summarise(meanEAR=max(sumEAR)) %>% + data.frame() + +graphData$class[graphData$class == "Human Drug, Non Prescription"] <- "Human Drug" +graphData$class[graphData$class == "Antimicrobial Disinfectant"] <- "Antimicrobial" +graphData$class[graphData$class == "Detergent Metabolites"] <- "Detergent" + +orderClass <- graphData %>% + group_by(class,category) %>% + summarise(median = median(meanEAR[meanEAR != 0])) %>% + data.frame() %>% + arrange(desc(median)) %>% + filter(!duplicated(class)) %>% + arrange(median) + +orderChem <- graphData %>% + group_by(category,class) %>% + summarise(median = quantile(meanEAR[meanEAR != 0],0.5)) %>% + data.frame() %>% + mutate(class = factor(class, levels=orderClass$class)) %>% + arrange(class, median) + +orderClass <- mutate(orderClass, class = factor(class, levels=levels(orderChem$class))) + +orderedLevels <- orderChem$category#[!is.na(orderChem$median)] + + +fancyNumbers2 <- function(n){ + textReturn <- signif(n,digits = 2) + textReturn <- as.character(textReturn) + textReturn[length(textReturn)] <- paste(">",textReturn[length(textReturn)]) + textReturn[1] <- paste("<",textReturn[1]) + return(textReturn) +} + +fancyNumbers <- function(n){ + nNoNA <- n[!is.na(n)] + x <-gsub(pattern = "1e",replacement = "10^",x = format(nNoNA, scientific = TRUE)) + exponents <- as.numeric(sapply(strsplit(x, "\\^"), function(j) j[2])) + base <- ifelse(exponents == 0, "1", ifelse(exponents == 1, "10","10^")) + exponents[exponents == 0 | exponents == 1] <- "" + textNums <- rep(NA, length(n)) + textNums[!is.na(n)] <- paste0(base,exponents) + + textReturn <- parse(text=textNums) + return(textReturn) +} + +stationINFO <- stationINFO + +stationINFO$Lake[stationINFO$shortName == "BlackMI"] <- "Lake Huron" +stationINFO$Lake[stationINFO$shortName == "HuronMI"] <- "Lake Erie" +stationINFO$Lake[stationINFO$shortName == "ClintonDP"] <- "Lake Erie" +stationINFO$Lake[stationINFO$shortName == "Clinton"] <- "Lake Erie" + + +sitesOrdered <- c("StLouis","Pigeon","Nemadji","WhiteWI","Bad","Montreal","PresqueIsle", + "Ontonagon","Sturgeon","Tahquamenon", + "Burns","IndianaHC","StJoseph","PawPaw", + "Kalamazoo2","Kalamazoo","GrandMI","MilwaukeeMouth","Muskegon", + "WhiteMI","Sheboygan","PereMarquette","Manitowoc", + "Manistee","Fox","Oconto","Peshtigo", + "Menominee","Indian","Cheboygan2","Cheboygan","Ford", + "Escanaba","Manistique", + "ThunderBay","AuSable","Rifle", + "Saginaw","Saginaw2","BlackMI","Clinton","Rouge","HuronMI","Raisin","Maumee", + "Portage","Sandusky","HuronOH","Vermilion","BlackOH","Rocky","Cuyahoga","GrandOH", + "Cattaraugus","Tonawanda","Genesee","Oswego","BlackNY","Oswegatchie","Grass","Raquette","StRegis") + +siteToFind <- unique(graphData$site) + +siteLimits <- stationINFO %>% + filter(shortName %in% unique(graphData$site)) + +siteLimits$Lake[siteLimits$Lake == "Detroit River and Lake St. Clair"] <- "Lake Michigan" +siteLimits$Lake[siteLimits$Lake == "St. Lawrence River"] <- "Lake Ontario" + +siteLimits <- siteLimits %>% + mutate(shortName = factor(shortName, levels=sitesOrdered[sitesOrdered %in% siteLimits$shortName])) + + diff --git a/heatMap.R b/heatMap.R new file mode 100644 index 00000000..67a1e632 --- /dev/null +++ b/heatMap.R @@ -0,0 +1,83 @@ +library(toxEval) +library(dplyr) +library(data.table) +library(tidyr) +library(ggplot2) +library(grid) +library(gridExtra) + +source("getDataReady.R") + +graphData <- graphData %>% + mutate(site = factor(site, levels = sitesOrdered[sitesOrdered %in% siteLimits$shortName])) %>% + filter(!is.na(category)) + +graphData$class <- factor(graphData$class, levels=levels(orderClass$class)) +graphData$category <- factor(graphData$category, levels=orderedLevels) + +siteLimits <- siteLimits %>% + mutate(shortName = factor(shortName, levels=sitesOrdered[sitesOrdered %in% siteLimits$shortName])) + +graphData <- left_join(graphData, select(siteLimits, shortName, Lake), by=c("site" = "shortName")) +graphData$Lake <- gsub(" River","", graphData$Lake) +graphData$Lake <- factor(gsub("Lake ","",graphData$Lake), c("Superior", "Michigan","Huron","Erie", "Ontario")) + +sitesToUse <- graphData %>% + group_by(site) %>% + summarize(toUse = any(meanEAR > 10^-3)) + +graphData_filtered <- graphData %>% + filter(site %in% sitesToUse$site[sitesToUse$toUse]) + +classToUse <- graphData_filtered %>% + group_by(class, site, category) %>% + summarize(toUse = any(meanEAR > 10^-3)) %>% + group_by(class) %>% + summarize(with10 = sum(toUse) >= 20, + countCats = length(unique(category))) %>% + filter(countCats > 2) + +graphData_filtered <- graphData_filtered %>% + filter(class %in% classToUse$class[classToUse$with10]) + +# graphData_filtered <- graphData %>% +# filter(!(Lake %in% c("Superior","Huron","Ontario","St. Lawrence"))) %>% +# filter(!is.na(category)) %>% +# filter(class %in% c("Plasticizer", "Herbicide","PAH", "Antioxidant")) + +# rmCat <- group_by(graphData_filtered, category) %>% +# summarize(allZero = all(meanEAR == 0)) + +heat <- ggplot(data = graphData_filtered) + + geom_tile(aes(x = site, y=category, fill=meanEAR)) + + theme_bw() + + theme(axis.text.x = element_text( angle = 90,vjust=0.5,hjust = 1)) + + ylab("") + + xlab("") + + labs(fill="Maximum EAR") + + scale_fill_gradient( guide = "legend", + trans = 'log', + low = "white", high = "steelblue", + breaks=c(0.00001,0.0001,0.001,0.01,0.1,1,5), + na.value = 'transparent',labels=fancyNumbers2) + + facet_grid(class ~ Lake,scales="free", space="free") + + theme(strip.text.y = element_text(angle=0, hjust=0, size=7), + strip.text.x = element_text(size = 7), + strip.background = element_rect(fill="transparent", colour = NA), + axis.text = element_text(size=7), + panel.spacing = unit(0.05, "lines"), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.text = element_text(size=8), + legend.title = element_text(size =8), + plot.background = element_rect(fill = "transparent",colour = NA)) +heat + +ggsave(heat, #bg = "transparent", + filename = "heat_2.png", + height = 5, width = 8.5) + +ggsave(heat, #bg = "transparent", + filename = "heat_Unfiltered.png", + height = 9, width = 11) + diff --git a/inst/extdata/birdData.rds b/inst/extdata/birdData.rds new file mode 100644 index 00000000..6f86ff94 Binary files /dev/null and b/inst/extdata/birdData.rds differ diff --git a/inst/extdata/birdSites.rds b/inst/extdata/birdSites.rds new file mode 100644 index 00000000..4270dc02 Binary files /dev/null and b/inst/extdata/birdSites.rds differ diff --git a/inst/extdata/leviData.rds b/inst/extdata/leviData.rds index b4b17170..b973b80d 100644 Binary files a/inst/extdata/leviData.rds and b/inst/extdata/leviData.rds differ diff --git a/inst/extdata/leviSites.rds b/inst/extdata/leviSites.rds index 72068886..b1f368d0 100644 Binary files a/inst/extdata/leviSites.rds and b/inst/extdata/leviSites.rds differ diff --git a/inst/shiny/server.R b/inst/shiny/server.R index dbf23e5a..95c9592b 100644 --- a/inst/shiny/server.R +++ b/inst/shiny/server.R @@ -21,39 +21,12 @@ endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name % c("CLD_CYP1A1_24hr","CLD_CYP1A1_48hr","CLD_CYP1A1_6hr", "CLD_CYP1A2_24hr","CLD_CYP1A2_48hr","CLD_CYP1A2_6hr")] <- "dna binding" -endPointInfo$intended_target_family_sub[endPointInfo$assay_component_endpoint_name %in% - c("CLD_CYP1A1_24hr","CLD_CYP1A1_48hr","CLD_CYP1A1_6hr", - "CLD_CYP1A2_24hr","CLD_CYP1A2_48hr","CLD_CYP1A2_6hr")] <- "basic helix-loop-helix protein" - -endPointInfo$intended_target_official_symbol[endPointInfo$assay_component_endpoint_name %in% - c("CLD_CYP1A1_24hr","CLD_CYP1A1_48hr","CLD_CYP1A1_6hr", - "CLD_CYP1A2_24hr","CLD_CYP1A2_48hr","CLD_CYP1A2_6hr")] <- "AhR" - endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name %in% c("CLD_CYP2B6_24hr","CLD_CYP2B6_48hr","CLD_CYP2B6_6hr", "CLD_CYP3A4_24hr","CLD_CYP3A4_48hr","CLD_CYP3A4_6hr", "CLD_SULT2A_48hr","CLD_UGT1A1_48hr","NVS_NR_bER", "NVS_NR_bPR","NVS_NR_cAR")] <- "nuclear receptor" -endPointInfo$intended_target_family_sub[endPointInfo$assay_component_endpoint_name %in% - c("CLD_CYP2B6_24hr","CLD_CYP2B6_48hr","CLD_CYP2B6_6hr", - "CLD_CYP3A4_24hr","CLD_CYP3A4_48hr","CLD_CYP3A4_6hr", - "CLD_SULT2A_48hr","CLD_UGT1A1_48hr")] <- "non-steroidal" - -endPointInfo$intended_target_official_symbol[endPointInfo$assay_component_endpoint_name %in% - c("CLD_CYP2B6_24hr","CLD_CYP2B6_48hr","CLD_CYP2B6_6hr", - "CLD_SULT2A_48hr")] <- "NR1I3" - -endPointInfo$intended_target_official_symbol[endPointInfo$assay_component_endpoint_name %in% - c("CLD_CYP3A4_24hr","CLD_CYP3A4_48hr","CLD_CYP3A4_6hr", - "CLD_SULT2A_48hr")] <- "NR1I2" - -endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name %in% - c("NVS_NR_bER", "NVS_NR_bPR","NVS_NR_cAR")] <- "steroidal" - -endPointInfo$intended_target_official_symbol[endPointInfo$assay_component_endpoint_name %in% - c("NVS_NR_bER", "NVS_NR_bPR","NVS_NR_cAR")] <- c("ESR","PGR","AR") - endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name %in% c("Tanguay_ZF_120hpf_ActivityScore", "Tanguay_ZF_120hpf_AXIS_up", @@ -73,7 +46,6 @@ endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name % "Tanguay_ZF_120hpf_TRUN_up", "Tanguay_ZF_120hpf_YSE_up")] <- "zebrafish" - choicesPerGroup <- apply(endPointInfo, 2, function(x) length(unique(x[!is.na(x)]))) choicesPerGroup <- which(choicesPerGroup > 6 & choicesPerGroup < 100) @@ -88,10 +60,14 @@ cleanUpNames[grep("Gpcr",cleanUpNames)] <- "GPCR" endPointInfo$intended_target_family <- cleanUpNames sitesOrdered <- c("StLouis","Pigeon","Nemadji","WhiteWI","Bad","Montreal","PresqueIsle", - "Ontonagon","Sturgeon","Tahquamenon","Manistique","Escanaba","Ford","Cheboygan2","Indian", - "Menominee","Peshtigo","Oconto","Fox","Manistee","Manitowoc","PereMarquette","Sheboygan", - "WhiteMI","Muskegon","MilwaukeeMouth","GrandMI","Kalamazoo2","PawPaw", - "StJoseph","IndianaHC","Burns","ThunderBay","AuSable","Rifle", + "Ontonagon","Sturgeon","Tahquamenon", + "Burns","IndianaHC","StJoseph","PawPaw", + "Kalamazoo2","Kalamazoo","GrandMI","MilwaukeeMouth","Muskegon", + "WhiteMI","Sheboygan","PereMarquette","Manitowoc", + "Manistee","Fox","Oconto","Peshtigo", + "Menominee","Indian","Cheboygan2","Cheboygan","Ford", + "Escanaba","Manistique", + "ThunderBay","AuSable","Rifle", "Saginaw","Saginaw2","BlackMI","Clinton","Rouge","HuronMI","Raisin","Maumee", "Portage","Sandusky","HuronOH","Vermilion","BlackOH","Rocky","Cuyahoga","GrandOH", "Cattaraugus","Tonawanda","Genesee","Oswego","BlackNY","Oswegatchie","Grass","Raquette","StRegis") @@ -99,6 +75,11 @@ sitesOrdered <- c("StLouis","Pigeon","Nemadji","WhiteWI","Bad","Montreal","Presq pathToApp <- system.file("extdata", package="toxEval") stationINFO <- readRDS(file.path(pathToApp,"sitesOWC.rds")) +stationINFO$Lake[stationINFO$shortName == "BlackMI"] <- "Lake Huron" +stationINFO$Lake[stationINFO$shortName == "HuronMI"] <- "Lake Erie" +stationINFO$Lake[stationINFO$shortName == "ClintonDP"] <- "Lake Erie" +stationINFO$Lake[stationINFO$shortName == "Clinton"] <- "Lake Erie" + summaryFile <- readRDS(file.path(pathToApp,"summary.rds")) endPoint <- readRDS(file.path(pathToApp,"endPoint.rds")) @@ -107,16 +88,15 @@ df2016 <- readRDS(file.path(pathToApp,"df2016.rds")) choicesPerGroup <- apply(endPointInfo[,-1], 2, function(x) length(unique(x))) groupChoices <- paste0(names(choicesPerGroup)," (",choicesPerGroup,")") -initAssay <- c("ATG","NVS","OT","TOX21","CEETOX", - "CLD","TANGUAY","NHEERL_PADILLA", - "NCCT_SIMMONS","ACEA") +initAssay <- c("ATG","NVS","OT","TOX21","CEETOX", "APR", #"APR"?,"BSK"? + "CLD","TANGUAY","NHEERL_PADILLA", + "NCCT_SIMMONS","ACEA") # flags <- unique(AC50gain$flags[!is.na(AC50gain$flags)]) # flags <- unique(unlist(strsplit(flags, "\\|"))) -flags <- c("Borderline active","Only highest conc above baseline, active" , - "Only one conc above baseline, active", - "Gain AC50 < lowest conc & loss AC50 < mean conc", - "Biochemical assay with < 50% efficacy") +flags <- c("Noisy data", + "Only one conc above baseline, active", + "Hit-call potentially confounded by overfitting") flagsALL <- c("Borderline active","Only highest conc above baseline, active" , "Only one conc above baseline, active","Noisy data", @@ -173,6 +153,10 @@ shinyServer(function(input, output,session) { } else if(input$data == "App State"){ leviSites <- readRDS(file.path(pathToApp,"leviSites.rds")) choices = c("All",leviSites$shortName) + + } else if(input$data == "Birds"){ + birdSites <- readRDS(file.path(pathToApp,"birdSites.rds")) + choices = c("All",birdSites$shortName) } else { choices = c("All","2016 GLRI SP sites",summaryFile$site) } @@ -333,6 +317,9 @@ shinyServer(function(input, output,session) { chemicalSummary <- readRDS(file.path(pathToApp,"leviData.rds")) stationINFO <<- readRDS(file.path(pathToApp,"leviSites.rds")) + } else if (input$data == "Birds"){ + chemicalSummary <- readRDS(file.path(pathToApp,"birdData.rds")) + stationINFO <<- readRDS(file.path(pathToApp,"birdSites.rds")) } chemicalSummary <- chemicalSummary %>% diff --git a/inst/shiny/ui.R b/inst/shiny/ui.R index 81398620..7bb75e32 100644 --- a/inst/shiny/ui.R +++ b/inst/shiny/ui.R @@ -22,39 +22,12 @@ endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name % c("CLD_CYP1A1_24hr","CLD_CYP1A1_48hr","CLD_CYP1A1_6hr", "CLD_CYP1A2_24hr","CLD_CYP1A2_48hr","CLD_CYP1A2_6hr")] <- "dna binding" -endPointInfo$intended_target_family_sub[endPointInfo$assay_component_endpoint_name %in% - c("CLD_CYP1A1_24hr","CLD_CYP1A1_48hr","CLD_CYP1A1_6hr", - "CLD_CYP1A2_24hr","CLD_CYP1A2_48hr","CLD_CYP1A2_6hr")] <- "basic helix-loop-helix protein" - -endPointInfo$intended_target_official_symbol[endPointInfo$assay_component_endpoint_name %in% - c("CLD_CYP1A1_24hr","CLD_CYP1A1_48hr","CLD_CYP1A1_6hr", - "CLD_CYP1A2_24hr","CLD_CYP1A2_48hr","CLD_CYP1A2_6hr")] <- "AhR" - endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name %in% c("CLD_CYP2B6_24hr","CLD_CYP2B6_48hr","CLD_CYP2B6_6hr", "CLD_CYP3A4_24hr","CLD_CYP3A4_48hr","CLD_CYP3A4_6hr", "CLD_SULT2A_48hr","CLD_UGT1A1_48hr","NVS_NR_bER", "NVS_NR_bPR","NVS_NR_cAR")] <- "nuclear receptor" -endPointInfo$intended_target_family_sub[endPointInfo$assay_component_endpoint_name %in% - c("CLD_CYP2B6_24hr","CLD_CYP2B6_48hr","CLD_CYP2B6_6hr", - "CLD_CYP3A4_24hr","CLD_CYP3A4_48hr","CLD_CYP3A4_6hr", - "CLD_SULT2A_48hr","CLD_UGT1A1_48hr")] <- "non-steroidal" - -endPointInfo$intended_target_official_symbol[endPointInfo$assay_component_endpoint_name %in% - c("CLD_CYP2B6_24hr","CLD_CYP2B6_48hr","CLD_CYP2B6_6hr", - "CLD_SULT2A_48hr")] <- "NR1I3" - -endPointInfo$intended_target_official_symbol[endPointInfo$assay_component_endpoint_name %in% - c("CLD_CYP3A4_24hr","CLD_CYP3A4_48hr","CLD_CYP3A4_6hr", - "CLD_SULT2A_48hr")] <- "NR1I2" - -endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name %in% - c("NVS_NR_bER", "NVS_NR_bPR","NVS_NR_cAR")] <- "steroidal" - -endPointInfo$intended_target_official_symbol[endPointInfo$assay_component_endpoint_name %in% - c("NVS_NR_bER", "NVS_NR_bPR","NVS_NR_cAR")] <- c("ESR","PGR","AR") - endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name %in% c("Tanguay_ZF_120hpf_ActivityScore", "Tanguay_ZF_120hpf_AXIS_up", @@ -74,8 +47,6 @@ endPointInfo$intended_target_family[endPointInfo$assay_component_endpoint_name % "Tanguay_ZF_120hpf_TRUN_up", "Tanguay_ZF_120hpf_YSE_up")] <- "zebrafish" - - choicesPerGroup <- apply(endPointInfo, 2, function(x) length(unique(x[!is.na(x)]))) choicesPerGroup <- which(choicesPerGroup > 6 & choicesPerGroup < 100) @@ -110,10 +81,9 @@ selChoices <- df$orderNames # flags <- unique(AC50gain$flags[!is.na(AC50gain$flags)]) # flags <- unique(unlist(strsplit(flags, "\\|"))) -flags <- c("Borderline active","Only highest conc above baseline, active" , - "Only one conc above baseline, active", - "Gain AC50 < lowest conc & loss AC50 < mean conc", - "Biochemical assay with < 50% efficacy") +flags <- c("Noisy data", + "Only one conc above baseline, active", + "Hit-call potentially confounded by overfitting") flagsALL <- c("Borderline active","Only highest conc above baseline, active" , "Only one conc above baseline, active","Noisy data", @@ -130,7 +100,8 @@ sidebar <- dashboardSidebar( "Duluth", "NPS", "TSHP", - "App State"), + "App State", + "Birds"), # ,"Detection Limits"), selected = "Water Sample", multiple = FALSE), radioButtons("radioMaxGroup", label = "", @@ -159,9 +130,9 @@ sidebar <- dashboardSidebar( "NHEERL_PADILLA"="NHEERL_PADILLA", "NCCT_SIMMONS"="NCCT_SIMMONS", "ACEA Biosciences" = "ACEA"), - selected=c("APR","ATG","NVS","OT","TOX21","CEETOX", - "CLD","TANGUAY","NHEERL_PADILLA", - "NCCT_SIMMONS","ACEA")), + selected= c("ATG","NVS","OT","TOX21","CEETOX", "APR", + "CLD","TANGUAY","NHEERL_PADILLA", + "NCCT_SIMMONS","ACEA")), actionButton("pickAssay", label="Switch Assays")), menuItem("Annotation", icon = icon("th"), tabName = "annotation", selectInput("groupCol", label = "Annotation (# Groups)",