-
Notifications
You must be signed in to change notification settings - Fork 0
/
Primers_OrganelleContamination.r
91 lines (78 loc) · 4.12 KB
/
Primers_OrganelleContamination.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#! /usr/bin/Rscript
# Plot graphics determining how well each primer set prevented chloroplast contamination
# Libraries
library(argparse)
library(dplyr)
library(ggplot2)
library(phyloseq)
library(tidyr)
options(stringsAsFactors=F)
# Command-line arguments
parser=ArgumentParser()
parser$add_argument("-i", "--infile", help="Rdata file containing the phyloseq object to analyze (from step 2a)")
parser$add_argument("-o", "--outprefix", help="Prefix for all output files")
parser$add_argument("-t", "--type", choices=c("extraction", "amplification"), default="extraction", help="Which experiment set this analysis belongs to")
args=parser$parse_args()
# setwd('/home/jgwall/Projects/Microbiomes/MicrobiomeMethodsDevelopment/CompareSampleExtractionAndAmplification_Mohsen_Cecelia/2020 03 Consolidated Pipeline/')
# args=parser$parse_args(c("-i", "TestPrimers/2_Analysis/2b_filtered_data.phyloseq.RDS", "-o", "99_tmp", "-t", "amplification"))
# Load data
cat("Loading phyloseq data\n")
source("StandardizeLabels.r")
mydata = standardize_labels(readRDS(args$infile), type=args$type)
# Pull data back out of Phyloseq object so can plot exactly what we want
otus = otu_table(mydata)
taxonomy = as.data.frame(tax_table(mydata))
key = sample_data(mydata)
# Sanity checks to make sure things line up properly
if ( ! identical(rownames(otus), rownames(taxonomy))){
stop("OTU names do not line up with taxonomy")
}
if ( ! identical(colnames(otus), rownames(key))){
stop("Sample names do not line up with sample key")
}
# Calculate fraction of chloroplast and mitochondria contamination in each
cat("Calculating fraction organnelar DNA\n")
is_chloroplast = taxonomy$Order == 'Chloroplast' # Chloroplasts are an order
is_mitochondria = taxonomy$Family == 'Mitochondria' # Mitochondria are a family
chloro_counts = colSums(otus[is_chloroplast,])
mito_counts = colSums(otus[is_mitochondria,])
total_counts = colSums(otus)
other_counts = total_counts - chloro_counts - mito_counts
# Make data frame for plotting the above
tally = data.frame(sample=colnames(otus), targets=other_counts/total_counts, sample.type = key$sample.type, treatment = key$treatment)
tally$organs = 1 - tally$targets
# Shorten labels for plotting
tally$treatment = as.character(tally$treatment)
tally$treatment[tally$treatment=='Universal'] = "Univ."
tally$treatment[tally$treatment=='Discriminating'] = "Disc."
# Split by sample type
plant_targets = droplevels(subset(tally, tally$sample.type %in% c("Maize Leaf", "Soybean Leaf")))
nonplant_targets = droplevels(subset(tally, tally$sample.type %in% c("Soil 1", "Soil 2", "Defined Community")))
# Plot actual graphic
plot_violins = function(mydata, mygroup=""){
set.seed(1) # To keep jitters the same each time
# Make plot
myplot = ggplot(mydata, mapping=aes(x=treatment, y=organs)) +
geom_violin(scale="width", fill='lightblue', size=0.25) + # Violin plots
geom_jitter(mapping=aes(color=sample.type), alpha=0.5) + # Scatter points of actual data (jittered to be easier to see)
labs(x="Primer Set", y="Fraction Organellar Reads") +
theme_bw() +
theme(
axis.title = element_text(size=11, face="bold"),
axis.text = element_text(size=8, face="bold"),
axis.text.x = element_text(angle=90, hjust=1, vjust=0.5),
legend.position = c(0.25, 0.82), # Legend in upper-left of plot
legend.title = element_text(size=6, face='bold'),
legend.text = element_text(size=5, face='bold'),
legend.margin = margin(3,3,3,3),
legend.box.background = element_rect(color='black'),
legend.spacing.y = unit(0, "pt"),
legend.key.size = unit(10, "pt") # adjusts the (hidden) background behind legend points; most important element for legend spacing
) +
labs(color="Sample Type")
# Save PNG and SVG
ggsave(myplot, file= paste(args$outprefix, mygroup, "png", sep="."), width=2.5, height = 2.6, units="in", dpi=300)
ggsave(myplot, file= paste(args$outprefix, mygroup, "svg", sep="."), width=2.5, height = 2.6, units="in", dpi=300)
}
plot_violins(plant_targets, "plant")
plot_violins(nonplant_targets, "nonplant")