-
Notifications
You must be signed in to change notification settings - Fork 0
/
GC_calculation.R
69 lines (61 loc) · 1.78 KB
/
GC_calculation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/urs/bin/env Rscript
#Calculate GC content for k31mers
path = "/projects/dumont-lab/uma/kmerplots"
fs = list.files(path, pattern = glob2rx("*.k31.minorsatellite.txt"))
GC <- data.frame(KMER = character(),
COUNT = integer(),
SAT = factor(),
HDIST = factor(),
STRAIN = factor(),
LIBRARY = factor(),
POPULATION = factor(),
TAXONOMY = factor(),
CON.SCR = factor(),
K = factor(),
GC = factor()
)
for (f in fs){
fname = file.path(path,f)
file = read.table(fname, header = TRUE, sep = '\t')
file <- file[file$HDIST == '0',]
file$GC <- 0
for (i in 1:nrow(file)) {
kmer <- as.vector(file[i,1])
kmer <- strsplit(kmer,split="")
gc <- 0
for (j in 1:31){
letter <- kmer[[1]][j]
if (letter == "G") {
gc <- gc + 1
} else if (letter == "C") {
gc <- gc + 1
}
}
file[i,11] <- (gc/31) * 100
}
GC <- rbind(GC,file)
}
path = "/projects/dumont-lab/uma/kmerplots"
fs = list.files(path, pattern = glob2rx("*.k31.majorsatellite.txt"))
for (f in fs){
fname = file.path(path,f)
file = read.table(fname, header = TRUE, sep = '\t')
file <- file[file$HDIST == '0',]
file$GC <- 0
for (i in 1:nrow(file)) {
kmer <- as.vector(file[i,1])
kmer <- strsplit(kmer,split="")
gc <- 0
for (j in 1:31){
letter <- kmer[[1]][j]
if (letter == "G") {
gc <- gc + 1
} else if (letter == "C") {
gc <- gc + 1
}
}
file[i,11] <- (gc/31) * 100
}
GC <- rbind(GC,file)
}
write.table(GC, file = 'h0.GC.k31.txt', row.names = FALSE, quote = FALSE,col.names = FALSE )