-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from karel-brinda/devel
NanoSim-H 1.1.0.0
- Loading branch information
Showing
105 changed files
with
34,324 additions
and
3,376 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,6 @@ | ||
ecoli_simulation | ||
simulated_* | ||
index.html | ||
*.pyc | ||
*.bak | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#! /usr/bin/env python3 | ||
|
||
vfn="nanosimh/version.py" | ||
|
||
exec(open(vfn).read()) | ||
|
||
numbers=VERSION.split(".") | ||
numbers[-1]=str(int(numbers[-1])+1) | ||
|
||
version=".".join(numbers) | ||
|
||
with open(vfn,"w") as f: | ||
f.write('VERSION="{}"'.format(version)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,13 @@ | ||
""" | ||
@copyright 2016 Chen Yang | ||
@copyright 2017 Karel Brinda | ||
Created by Chen Yang <[email protected]> (NanoSim) | ||
Forked and modified by Karel Brinda <[email protected]> (NanoSim-H) | ||
License: GPLv3 | ||
""" | ||
|
||
from .simulate import * | ||
from .train import * | ||
from .mixed_models import * | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,18 @@ | ||
#!/usr/bin/env python | ||
|
||
""" | ||
@copyright 2016 Chen Yang | ||
@copyright 2017 Karel Brinda | ||
Created by Chen Yang <[email protected]> (NanoSim) | ||
Forked and modified by Karel Brinda <[email protected]> (NanoSim-H) | ||
License: GPLv3 | ||
""" | ||
|
||
from __future__ import with_statement | ||
import numpy | ||
import os | ||
|
||
def add_dict(error, dic): | ||
if error not in dic: | ||
|
@@ -27,14 +38,14 @@ def add_match(prev, succ, match_list): | |
match_list[prev][succ] += 1 | ||
|
||
|
||
def hist(outfile): | ||
out_match = open(outfile + "_match.hist", 'w') | ||
out_mis = open(outfile + "_mis.hist", 'w') | ||
out_ins = open(outfile + "_ins.hist", 'w') | ||
out_del = open(outfile + "_del.hist", 'w') | ||
out1 = open(outfile + "_error_markov_model", 'w') | ||
out2 = open(outfile + "_match_markov_model", 'w') | ||
out3 = open(outfile + "_first_match.hist", 'w') | ||
def hist(model_dir): | ||
out_match = open(os.path.join(model_dir,"match.hist"), 'w') | ||
out_mis = open(os.path.join(model_dir,"mis.hist"), 'w') | ||
out_ins = open(os.path.join(model_dir,"ins.hist"), 'w') | ||
out_del = open(os.path.join(model_dir,"del.hist"), 'w') | ||
out1 = open(os.path.join(model_dir,"error_markov_model"), 'w') | ||
out2 = open(os.path.join(model_dir,"match_markov_model"), 'w') | ||
out3 = open(os.path.join(model_dir,"first_match.hist"), 'w') | ||
|
||
dic_match = {} | ||
dic_first_match = {} | ||
|
@@ -63,7 +74,7 @@ def hist(outfile): | |
dic_ins[x] = 0 | ||
dic_del[x] = 0 | ||
|
||
with open(outfile + "_besthit.maf", 'r') as f: | ||
with open(os.path.join(model_dir,"besthit.maf"), 'r') as f: | ||
for line in f: | ||
prev_match = 0 | ||
prev_error = "" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,22 @@ | ||
#!/usr/bin/env python | ||
|
||
""" | ||
@copyright 2016 Chen Yang | ||
@copyright 2017 Karel Brinda | ||
Created by Chen Yang <[email protected]> (NanoSim) | ||
Forked and modified by Karel Brinda <[email protected]> (NanoSim-H) | ||
License: GPLv3 | ||
""" | ||
|
||
from __future__ import with_statement | ||
|
||
import os | ||
|
||
def besthit_and_unaligned(infile, outmaf, outfile): | ||
def besthit_and_unaligned(infile, outmaf, model_dir): | ||
align_dict = {} | ||
out1 = open(outfile + "_besthit.maf", 'w') | ||
out1 = open(os.path.join(model_dir, "besthit.maf"), 'w') | ||
unaligned_dict = {} | ||
|
||
with open(outmaf, 'r') as f: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,23 @@ | ||
#!/usr/bin/env python | ||
|
||
""" | ||
Written by Chen Yang on Mar 25th, 2015 | ||
To get the length of head, aligned, and tail regions of an alignment. | ||
@copyright 2016 Chen Yang | ||
@copyright 2017 Karel Brinda | ||
Created by Chen Yang <[email protected]> (NanoSim) | ||
Forked and modified by Karel Brinda <[email protected]> (NanoSim-H) | ||
Major change in Apr 22nd | ||
License: GPLv3 | ||
Updated in Nov 25th | ||
To get the length of head, aligned, and tail regions of an alignment. | ||
""" | ||
|
||
|
||
from __future__ import with_statement | ||
import sys | ||
import getopt | ||
import numpy | ||
|
||
try: | ||
from six.moves import range | ||
except ImportError: | ||
pass | ||
import os | ||
|
||
|
||
def flex_bins(num_of_bins, ratio_dict, num_of_reads): | ||
|
@@ -60,28 +61,18 @@ def flex_bins(num_of_bins, ratio_dict, num_of_reads): | |
return ratio_bins | ||
|
||
|
||
def head_align_tail(outfile, num_of_bins): | ||
out1 = open(outfile + '_aligned_length_ecdf', 'w') | ||
out2 = open(outfile + '_aligned_reads_ecdf', 'w') | ||
out3 = open(outfile + '_ht_ratio', 'w') | ||
out4 = open(outfile + "_align_ratio", 'w') | ||
|
||
''' | ||
out5 = open(outfile + "_total.txt", 'w') | ||
out6 = open(outfile + "_middle.txt", 'w') | ||
out7 = open(outfile + "_head.txt", 'w') | ||
out8 = open(outfile + "_middle_ref.txt", 'w') | ||
out9 = open(outfile + "_ht.txt", 'w') | ||
out10 = open(outfile + "_ratio.txt", 'w') | ||
out11 = open(outfile + "_tail.txt", 'w') | ||
''' | ||
def head_align_tail(model_dir, num_of_bins): | ||
out1 = open(os.path.join(model_dir,'aligned_length_ecdf'), 'w') | ||
out2 = open(os.path.join(model_dir,'aligned_reads_ecdf'), 'w') | ||
out3 = open(os.path.join(model_dir,'ht_ratio'), 'w') | ||
out4 = open(os.path.join(model_dir,'align_ratio'), 'w') | ||
|
||
aligned = [] | ||
total = [] | ||
ht_ratio = {} | ||
align_ratio = {} | ||
|
||
besthit_out = outfile + "_besthit.maf" | ||
besthit_out = os.path.join(model_dir, "besthit.maf") | ||
with open(besthit_out, 'r') as f: | ||
for line in f: | ||
ref = line.strip().split() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,14 @@ | ||
""" | ||
@copyright 2016 Chen Yang | ||
@copyright 2017 Karel Brinda | ||
Created by Chen Yang <[email protected]> (NanoSim) | ||
Forked and modified by Karel Brinda <[email protected]> (NanoSim-H) | ||
License: GPLv3 | ||
""" | ||
|
||
|
||
import os | ||
|
||
def assert_file_exists(fn,test_nonempty=False): | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,39 @@ | ||
#!/usr/bin/env python | ||
|
||
""" | ||
Created on Apr 28th by Chen Yang | ||
@copyright 2016 Chen Yang | ||
@copyright 2017 Karel Brinda | ||
Created by Chen Yang <[email protected]> (NanoSim) | ||
Forked and modified by Karel Brinda <[email protected]> (NanoSim-H) | ||
License: GPLv3 | ||
This script is used to generate random numbers following certain mixed distribution models | ||
""" | ||
|
||
import numpy as np | ||
import math | ||
import numpy | ||
|
||
# numpy.random.geometric generate positive integers, starting from 1 | ||
# the rgeom in R generate values starting from 0 | ||
|
||
|
||
def pois_geom(lam, prob, weight): | ||
tmp_rand = np.random.random() | ||
tmp_rand = numpy.random.random() | ||
if tmp_rand < weight: | ||
value = np.random.poisson(lam) + 1 | ||
value = numpy.random.poisson(lam) + 1 | ||
else: | ||
value = np.random.geometric(prob) | ||
value = numpy.random.geometric(prob) | ||
return value | ||
|
||
|
||
def wei_geom(lam, k, prob, weight): | ||
tmp_rand = np.random.random() | ||
tmp_rand = numpy.random.random() | ||
if tmp_rand < weight: | ||
value = int(round(math.ceil(lam * np.random.weibull(k)))) | ||
value = int(round(math.ceil(lam * numpy.random.weibull(k)))) | ||
else: | ||
value = np.random.geometric(prob) - 1 | ||
value = numpy.random.geometric(prob) - 1 | ||
|
||
if value == 0: | ||
value = 1 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,14 @@ | ||
############################################################################# | ||
# @copyright 2016 Chen Yang # | ||
# @copyright 2017 Karel Brinda # | ||
# # | ||
# Created by Chen Yang <[email protected]> (NanoSim) # | ||
# Forked and modified by Karel Brinda <[email protected] (NanoSim-H) # | ||
# # | ||
# License: GPL # | ||
############################################################################# | ||
|
||
|
||
library(stats4) | ||
|
||
# Ensure that the results will be reproducible | ||
|
@@ -8,9 +19,9 @@ set.seed(1) | |
args <- commandArgs(TRUE) | ||
eval(parse(text=args[[1]])) | ||
# prefix <- "R9/1D/test" | ||
mis_file <- paste(prefix, "_mis.hist", sep="") | ||
ins_file <- paste(prefix, "_ins.hist", sep="") | ||
del_file <- paste(prefix, "_del.hist", sep="") | ||
mis_file <- paste(prefix, "/mis.hist", sep="") | ||
ins_file <- paste(prefix, "/ins.hist", sep="") | ||
del_file <- paste(prefix, "/del.hist", sep="") | ||
|
||
mis <- read.delim(mis_file) | ||
mis.freq <- mis$Mismatches. | ||
|
@@ -196,5 +207,5 @@ model_fit.table <- data.frame(Type = c("mismatch", "insertion", "deletion"), | |
prob = c(mis.fit[2], ins.fit[3], del.fit[3]), | ||
weight = c(mis.fit[3], ins.fit[4], del.fit[4])) | ||
|
||
out_file <- paste(prefix, "_model_profile", sep="") | ||
out_file <- paste(prefix, "/model_profile", sep="") | ||
write.table(model_fit.table, out_file, row.names = FALSE, quote = FALSE, sep = "\t") |
Oops, something went wrong.