Skip to content

Commit

Permalink
Merge pull request #2 from karel-brinda/devel
Browse files Browse the repository at this point in the history
NanoSim-H 1.1.0.0
  • Loading branch information
Karel Břinda authored May 10, 2017
2 parents 08f5e43 + 238d6d3 commit 8aae817
Show file tree
Hide file tree
Showing 105 changed files with 34,324 additions and 3,376 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
ecoli_simulation
simulated_*
index.html
*.pyc
*.bak

Expand Down
2 changes: 0 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ addons:
- gcc-4.8
- r-base
- r-base-dev
#- zlib1g-dev
#- libbz2-dev
before_install:
- wget http://last.cbrc.jp/last-833.zip
- unzip last-*.zip
Expand Down
26 changes: 0 additions & 26 deletions COPYRIGHT

This file was deleted.

3 changes: 2 additions & 1 deletion LICENSE.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
NanoSim
NanoSim-H - a simulator of Oxford Nanopore reads.
Copyright 2016 Chen Yang
Copyright 2017 Karel Brinda

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
Expand Down
310 changes: 187 additions & 123 deletions README.rst

Large diffs are not rendered by default.

13 changes: 13 additions & 0 deletions increment_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#! /usr/bin/env python3

vfn="nanosimh/version.py"

exec(open(vfn).read())

numbers=VERSION.split(".")
numbers[-1]=str(int(numbers[-1])+1)

version=".".join(numbers)

with open(vfn,"w") as f:
f.write('VERSION="{}"'.format(version))
10 changes: 10 additions & 0 deletions nanosimh/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
"""
@copyright 2016 Chen Yang
@copyright 2017 Karel Brinda
Created by Chen Yang <[email protected]> (NanoSim)
Forked and modified by Karel Brinda <[email protected]> (NanoSim-H)
License: GPLv3
"""

from .simulate import *
from .train import *
from .mixed_models import *
Expand Down
29 changes: 20 additions & 9 deletions nanosimh/besthit_to_histogram.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
#!/usr/bin/env python

"""
@copyright 2016 Chen Yang
@copyright 2017 Karel Brinda
Created by Chen Yang <[email protected]> (NanoSim)
Forked and modified by Karel Brinda <[email protected]> (NanoSim-H)
License: GPLv3
"""

from __future__ import with_statement
import numpy
import os

def add_dict(error, dic):
if error not in dic:
Expand All @@ -27,14 +38,14 @@ def add_match(prev, succ, match_list):
match_list[prev][succ] += 1


def hist(outfile):
out_match = open(outfile + "_match.hist", 'w')
out_mis = open(outfile + "_mis.hist", 'w')
out_ins = open(outfile + "_ins.hist", 'w')
out_del = open(outfile + "_del.hist", 'w')
out1 = open(outfile + "_error_markov_model", 'w')
out2 = open(outfile + "_match_markov_model", 'w')
out3 = open(outfile + "_first_match.hist", 'w')
def hist(model_dir):
out_match = open(os.path.join(model_dir,"match.hist"), 'w')
out_mis = open(os.path.join(model_dir,"mis.hist"), 'w')
out_ins = open(os.path.join(model_dir,"ins.hist"), 'w')
out_del = open(os.path.join(model_dir,"del.hist"), 'w')
out1 = open(os.path.join(model_dir,"error_markov_model"), 'w')
out2 = open(os.path.join(model_dir,"match_markov_model"), 'w')
out3 = open(os.path.join(model_dir,"first_match.hist"), 'w')

dic_match = {}
dic_first_match = {}
Expand Down Expand Up @@ -63,7 +74,7 @@ def hist(outfile):
dic_ins[x] = 0
dic_del[x] = 0

with open(outfile + "_besthit.maf", 'r') as f:
with open(os.path.join(model_dir,"besthit.maf"), 'r') as f:
for line in f:
prev_match = 0
prev_error = ""
Expand Down
15 changes: 13 additions & 2 deletions nanosimh/get_besthit.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
#!/usr/bin/env python

"""
@copyright 2016 Chen Yang
@copyright 2017 Karel Brinda
Created by Chen Yang <[email protected]> (NanoSim)
Forked and modified by Karel Brinda <[email protected]> (NanoSim-H)
License: GPLv3
"""

from __future__ import with_statement

import os

def besthit_and_unaligned(infile, outmaf, outfile):
def besthit_and_unaligned(infile, outmaf, model_dir):
align_dict = {}
out1 = open(outfile + "_besthit.maf", 'w')
out1 = open(os.path.join(model_dir, "besthit.maf"), 'w')
unaligned_dict = {}

with open(outmaf, 'r') as f:
Expand Down
41 changes: 16 additions & 25 deletions nanosimh/head_align_tail_dist.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
#!/usr/bin/env python

"""
Written by Chen Yang on Mar 25th, 2015
To get the length of head, aligned, and tail regions of an alignment.
@copyright 2016 Chen Yang
@copyright 2017 Karel Brinda
Created by Chen Yang <[email protected]> (NanoSim)
Forked and modified by Karel Brinda <[email protected]> (NanoSim-H)
Major change in Apr 22nd
License: GPLv3
Updated in Nov 25th
To get the length of head, aligned, and tail regions of an alignment.
"""


from __future__ import with_statement
import sys
import getopt
import numpy

try:
from six.moves import range
except ImportError:
pass
import os


def flex_bins(num_of_bins, ratio_dict, num_of_reads):
Expand Down Expand Up @@ -60,28 +61,18 @@ def flex_bins(num_of_bins, ratio_dict, num_of_reads):
return ratio_bins


def head_align_tail(outfile, num_of_bins):
out1 = open(outfile + '_aligned_length_ecdf', 'w')
out2 = open(outfile + '_aligned_reads_ecdf', 'w')
out3 = open(outfile + '_ht_ratio', 'w')
out4 = open(outfile + "_align_ratio", 'w')

'''
out5 = open(outfile + "_total.txt", 'w')
out6 = open(outfile + "_middle.txt", 'w')
out7 = open(outfile + "_head.txt", 'w')
out8 = open(outfile + "_middle_ref.txt", 'w')
out9 = open(outfile + "_ht.txt", 'w')
out10 = open(outfile + "_ratio.txt", 'w')
out11 = open(outfile + "_tail.txt", 'w')
'''
def head_align_tail(model_dir, num_of_bins):
out1 = open(os.path.join(model_dir,'aligned_length_ecdf'), 'w')
out2 = open(os.path.join(model_dir,'aligned_reads_ecdf'), 'w')
out3 = open(os.path.join(model_dir,'ht_ratio'), 'w')
out4 = open(os.path.join(model_dir,'align_ratio'), 'w')

aligned = []
total = []
ht_ratio = {}
align_ratio = {}

besthit_out = outfile + "_besthit.maf"
besthit_out = os.path.join(model_dir, "besthit.maf")
with open(besthit_out, 'r') as f:
for line in f:
ref = line.strip().split()
Expand Down
11 changes: 11 additions & 0 deletions nanosimh/misc.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
"""
@copyright 2016 Chen Yang
@copyright 2017 Karel Brinda
Created by Chen Yang <[email protected]> (NanoSim)
Forked and modified by Karel Brinda <[email protected]> (NanoSim-H)
License: GPLv3
"""


import os

def assert_file_exists(fn,test_nonempty=False):
Expand Down
23 changes: 15 additions & 8 deletions nanosimh/mixed_models.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,39 @@
#!/usr/bin/env python

"""
Created on Apr 28th by Chen Yang
@copyright 2016 Chen Yang
@copyright 2017 Karel Brinda
Created by Chen Yang <[email protected]> (NanoSim)
Forked and modified by Karel Brinda <[email protected]> (NanoSim-H)
License: GPLv3
This script is used to generate random numbers following certain mixed distribution models
"""

import numpy as np
import math
import numpy

# numpy.random.geometric generate positive integers, starting from 1
# the rgeom in R generate values starting from 0


def pois_geom(lam, prob, weight):
tmp_rand = np.random.random()
tmp_rand = numpy.random.random()
if tmp_rand < weight:
value = np.random.poisson(lam) + 1
value = numpy.random.poisson(lam) + 1
else:
value = np.random.geometric(prob)
value = numpy.random.geometric(prob)
return value


def wei_geom(lam, k, prob, weight):
tmp_rand = np.random.random()
tmp_rand = numpy.random.random()
if tmp_rand < weight:
value = int(round(math.ceil(lam * np.random.weibull(k))))
value = int(round(math.ceil(lam * numpy.random.weibull(k))))
else:
value = np.random.geometric(prob) - 1
value = numpy.random.geometric(prob) - 1

if value == 0:
value = 1
Expand Down
19 changes: 15 additions & 4 deletions nanosimh/model_fitting.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
#############################################################################
# @copyright 2016 Chen Yang #
# @copyright 2017 Karel Brinda #
# #
# Created by Chen Yang <[email protected]> (NanoSim) #
# Forked and modified by Karel Brinda <[email protected] (NanoSim-H) #
# #
# License: GPL #
#############################################################################


library(stats4)

# Ensure that the results will be reproducible
Expand All @@ -8,9 +19,9 @@ set.seed(1)
args <- commandArgs(TRUE)
eval(parse(text=args[[1]]))
# prefix <- "R9/1D/test"
mis_file <- paste(prefix, "_mis.hist", sep="")
ins_file <- paste(prefix, "_ins.hist", sep="")
del_file <- paste(prefix, "_del.hist", sep="")
mis_file <- paste(prefix, "/mis.hist", sep="")
ins_file <- paste(prefix, "/ins.hist", sep="")
del_file <- paste(prefix, "/del.hist", sep="")

mis <- read.delim(mis_file)
mis.freq <- mis$Mismatches.
Expand Down Expand Up @@ -196,5 +207,5 @@ model_fit.table <- data.frame(Type = c("mismatch", "insertion", "deletion"),
prob = c(mis.fit[2], ins.fit[3], del.fit[3]),
weight = c(mis.fit[3], ins.fit[4], del.fit[4]))

out_file <- paste(prefix, "_model_profile", sep="")
out_file <- paste(prefix, "/model_profile", sep="")
write.table(model_fit.table, out_file, row.names = FALSE, quote = FALSE, sep = "\t")
Loading

0 comments on commit 8aae817

Please sign in to comment.