Merge pull request #2 from karel-brinda/devel

NanoSim-H 1.1.0.0
karel-brinda · May 10, 2017 · 8aae817 · 8aae817
2 parents 08f5e43 + 238d6d3
commit 8aae817
Show file tree

Hide file tree

Showing 105 changed files with 34,324 additions and 3,376 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,6 @@
 ecoli_simulation
+simulated_*
+index.html
 *.pyc
 *.bak
 

diff --git a/.travis.yml b/.travis.yml
@@ -21,8 +21,6 @@ addons:
     - gcc-4.8
     - r-base
     - r-base-dev
-    #- zlib1g-dev
-    #- libbz2-dev
 before_install:
 - wget http://last.cbrc.jp/last-833.zip
 - unzip last-*.zip

diff --git a/COPYRIGHT b/COPYRIGHT
diff --git a/LICENSE.txt b/LICENSE.txt
@@ -1,5 +1,6 @@
-NanoSim
+NanoSim-H - a simulator of Oxford Nanopore reads.
 Copyright 2016 Chen Yang
+Copyright 2017 Karel Brinda
 
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by

diff --git a/README.rst b/README.rst
diff --git a/increment_version.py b/increment_version.py
@@ -0,0 +1,13 @@
+#! /usr/bin/env python3
+
+vfn="nanosimh/version.py"
+
+exec(open(vfn).read())
+
+numbers=VERSION.split(".")
+numbers[-1]=str(int(numbers[-1])+1)
+
+version=".".join(numbers)
+
+with open(vfn,"w") as f:
+	f.write('VERSION="{}"'.format(version))
diff --git a/nanosimh/__init__.py b/nanosimh/__init__.py
@@ -1,3 +1,13 @@
+"""
+@copyright 2016 Chen Yang
+@copyright 2017 Karel Brinda
+
+Created by Chen Yang <[email protected]> (NanoSim)
+Forked and modified by Karel Brinda <[email protected]> (NanoSim-H)
+
+License: GPLv3
+"""
+
 from .simulate import *
 from .train import *
 from .mixed_models import *

diff --git a/nanosimh/besthit_to_histogram.py b/nanosimh/besthit_to_histogram.py
@@ -1,7 +1,18 @@
 #!/usr/bin/env python
 
+"""
+@copyright 2016 Chen Yang
+@copyright 2017 Karel Brinda
+
+Created by Chen Yang <[email protected]> (NanoSim)
+Forked and modified by Karel Brinda <[email protected]> (NanoSim-H)
+
+License: GPLv3
+"""
+
 from __future__ import with_statement
 import numpy
+import os
 
 def add_dict(error, dic):
 	if error not in dic:
@@ -27,14 +38,14 @@ def add_match(prev, succ, match_list):
 	match_list[prev][succ] += 1
 
 
-def hist(outfile):
-	out_match = open(outfile + "_match.hist", 'w')
-	out_mis = open(outfile + "_mis.hist", 'w')
-	out_ins = open(outfile + "_ins.hist", 'w')
-	out_del = open(outfile + "_del.hist", 'w')
-	out1 = open(outfile + "_error_markov_model", 'w')
-	out2 = open(outfile + "_match_markov_model", 'w')
-	out3 = open(outfile + "_first_match.hist", 'w')
+def hist(model_dir):
+	out_match = open(os.path.join(model_dir,"match.hist"), 'w')
+	out_mis = open(os.path.join(model_dir,"mis.hist"), 'w')
+	out_ins = open(os.path.join(model_dir,"ins.hist"), 'w')
+	out_del = open(os.path.join(model_dir,"del.hist"), 'w')
+	out1 = open(os.path.join(model_dir,"error_markov_model"), 'w')
+	out2 = open(os.path.join(model_dir,"match_markov_model"), 'w')
+	out3 = open(os.path.join(model_dir,"first_match.hist"), 'w')
 
 	dic_match = {}
 	dic_first_match = {}
@@ -63,7 +74,7 @@ def hist(outfile):
 		dic_ins[x] = 0
 		dic_del[x] = 0
 
-	with open(outfile + "_besthit.maf", 'r') as f:
+	with open(os.path.join(model_dir,"besthit.maf"), 'r') as f:
 		for line in f:
 			prev_match = 0
 			prev_error = ""

diff --git a/nanosimh/get_besthit.py b/nanosimh/get_besthit.py
@@ -1,11 +1,22 @@
 #!/usr/bin/env python
 
+"""
+@copyright 2016 Chen Yang
+@copyright 2017 Karel Brinda
+
+Created by Chen Yang <[email protected]> (NanoSim)
+Forked and modified by Karel Brinda <[email protected]> (NanoSim-H)
+
+License: GPLv3
+"""
+
 from __future__ import with_statement
 
+import os
 
-def besthit_and_unaligned(infile, outmaf, outfile):
+def besthit_and_unaligned(infile, outmaf, model_dir):
 	align_dict = {}
-	out1 = open(outfile + "_besthit.maf", 'w')
+	out1 = open(os.path.join(model_dir, "besthit.maf"), 'w')
 	unaligned_dict = {}
 
 	with open(outmaf, 'r') as f:

diff --git a/nanosimh/head_align_tail_dist.py b/nanosimh/head_align_tail_dist.py
@@ -1,22 +1,23 @@
 #!/usr/bin/env python
+
 """
-Written by Chen Yang on Mar 25th, 2015
-To get the length of head, aligned, and tail regions of an alignment.
+@copyright 2016 Chen Yang
+@copyright 2017 Karel Brinda
+
+Created by Chen Yang <[email protected]> (NanoSim)
+Forked and modified by Karel Brinda <[email protected]> (NanoSim-H)
 
-Major change in Apr 22nd
+License: GPLv3
 
-Updated in Nov 25th
+To get the length of head, aligned, and tail regions of an alignment.
 """
 
+
 from __future__ import with_statement
 import sys
 import getopt
 import numpy
-
-try:
-	from six.moves import range
-except ImportError:
-	pass
+import os
 
 
 def flex_bins(num_of_bins, ratio_dict, num_of_reads):
@@ -60,28 +61,18 @@ def flex_bins(num_of_bins, ratio_dict, num_of_reads):
 	return ratio_bins
 
 
-def head_align_tail(outfile, num_of_bins):
-	out1 = open(outfile + '_aligned_length_ecdf', 'w')
-	out2 = open(outfile + '_aligned_reads_ecdf', 'w')
-	out3 = open(outfile + '_ht_ratio', 'w')
-	out4 = open(outfile + "_align_ratio", 'w')
-
-	'''
-	out5 = open(outfile + "_total.txt", 'w')
-	out6 = open(outfile + "_middle.txt", 'w')
-	out7 = open(outfile + "_head.txt", 'w')
-	out8 = open(outfile + "_middle_ref.txt", 'w')
-	out9 = open(outfile + "_ht.txt", 'w')
-	out10 = open(outfile + "_ratio.txt", 'w')
-	out11 = open(outfile + "_tail.txt", 'w')
-	'''
+def head_align_tail(model_dir, num_of_bins):
+	out1 = open(os.path.join(model_dir,'aligned_length_ecdf'), 'w')
+	out2 = open(os.path.join(model_dir,'aligned_reads_ecdf'), 'w')
+	out3 = open(os.path.join(model_dir,'ht_ratio'), 'w')
+	out4 = open(os.path.join(model_dir,'align_ratio'), 'w')
 
 	aligned = []
 	total = []
 	ht_ratio = {}
 	align_ratio = {}
 
-	besthit_out = outfile + "_besthit.maf"
+	besthit_out = os.path.join(model_dir, "besthit.maf")
 	with open(besthit_out, 'r') as f:
 		for line in f:
 			ref = line.strip().split()

diff --git a/nanosimh/misc.py b/nanosimh/misc.py
@@ -1,3 +1,14 @@
+"""
+@copyright 2016 Chen Yang
+@copyright 2017 Karel Brinda
+
+Created by Chen Yang <[email protected]> (NanoSim)
+Forked and modified by Karel Brinda <[email protected]> (NanoSim-H)
+
+License: GPLv3
+"""
+
+
 import os
 
 def assert_file_exists(fn,test_nonempty=False):

diff --git a/nanosimh/mixed_models.py b/nanosimh/mixed_models.py
@@ -1,32 +1,39 @@
 #!/usr/bin/env python
+
 """
-Created on Apr 28th by Chen Yang
+@copyright 2016 Chen Yang
+@copyright 2017 Karel Brinda
+
+Created by Chen Yang <[email protected]> (NanoSim)
+Forked and modified by Karel Brinda <[email protected]> (NanoSim-H)
+
+License: GPLv3
 
 This script is used to generate random numbers following certain mixed distribution models
 """
 
-import numpy as np
 import math
+import numpy
 
 # numpy.random.geometric generate positive integers, starting from 1
 # the rgeom in R generate values starting from 0
 
 
 def pois_geom(lam, prob, weight):
-	tmp_rand = np.random.random()
+	tmp_rand = numpy.random.random()
 	if tmp_rand < weight:
-		value = np.random.poisson(lam) + 1
+		value = numpy.random.poisson(lam) + 1
 	else:
-		value = np.random.geometric(prob)
+		value = numpy.random.geometric(prob)
 	return value
 
 
 def wei_geom(lam, k, prob, weight):
-	tmp_rand = np.random.random()
+	tmp_rand = numpy.random.random()
 	if tmp_rand < weight:
-		value = int(round(math.ceil(lam * np.random.weibull(k))))
+		value = int(round(math.ceil(lam * numpy.random.weibull(k))))
 	else:
-		value = np.random.geometric(prob) - 1
+		value = numpy.random.geometric(prob) - 1
 
 	if value == 0:
 		value = 1

diff --git a/nanosimh/model_fitting.R b/nanosimh/model_fitting.R
@@ -1,3 +1,14 @@
+#############################################################################
+# @copyright 2016 Chen Yang                                                 #
+# @copyright 2017 Karel Brinda                                              #
+#                                                                           #
+# Created by Chen Yang <[email protected]> (NanoSim)                           #
+# Forked and modified by Karel Brinda <[email protected] (NanoSim-H) #
+#                                                                           #
+# License: GPL                                                              #
+#############################################################################
+
+
 library(stats4)
 
 # Ensure that the results will be reproducible
@@ -8,9 +19,9 @@ set.seed(1)
 args <- commandArgs(TRUE)
 eval(parse(text=args[[1]]))
 # prefix <- "R9/1D/test"
-mis_file <- paste(prefix, "_mis.hist", sep="")
-ins_file <- paste(prefix, "_ins.hist", sep="")
-del_file <- paste(prefix, "_del.hist", sep="")
+mis_file <- paste(prefix, "/mis.hist", sep="")
+ins_file <- paste(prefix, "/ins.hist", sep="")
+del_file <- paste(prefix, "/del.hist", sep="")
 
 mis <- read.delim(mis_file)
 mis.freq <- mis$Mismatches.
@@ -196,5 +207,5 @@ model_fit.table <- data.frame(Type = c("mismatch", "insertion", "deletion"),
                               prob = c(mis.fit[2], ins.fit[3], del.fit[3]),
                               weight = c(mis.fit[3], ins.fit[4], del.fit[4]))
 
-out_file <- paste(prefix, "_model_profile", sep="")
+out_file <- paste(prefix, "/model_profile", sep="")
 write.table(model_fit.table, out_file, row.names = FALSE, quote = FALSE, sep = "\t")