Skip to content

Commit

Permalink
v3.0.2 release prep
Browse files Browse the repository at this point in the history
  • Loading branch information
M. Boemo committed Jul 28, 2022
1 parent 7e4be09 commit 5d4869c
Show file tree
Hide file tree
Showing 83 changed files with 2,061 additions and 5,213 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ examples/
hdf5-1.8.14/
tensorflow/
hdf5-1.8.14.tar.gz
htslib/

#generated
.depend
Expand All @@ -26,8 +27,9 @@ hdf5-1.8.14.tar.gz
.pydevproject
.settings/*

#commit log file
#commit log file and path
src/gitcommit.h
src/softwarepath.h

#plots
*.png
Expand Down
51 changes: 34 additions & 17 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,30 +1,43 @@
CC = gcc
CXX = g++
DEBUG = -g
LIBFLAGS =
LIBFLAGS = -lrt
LDFLAGS ?= -ldl -llzma -lbz2 -lm -lz
CXXFLAGS = -Wall -O2 -fopenmp -std=c++14
CFLAGS = -Wall -std=c99 -O2

SPACE:= ;
SPACE+=;
null :=
space := ${null} ${null}
${space} := ${space}

CURRENT_PATH := $(subst $(lastword $(notdir $(MAKEFILE_LIST))),,$(subst $(SPACE),\$(SPACE),$(shell realpath '$(strip $(MAKEFILE_LIST))')))
PATH_SPACEFIX := $(subst ${space},\${space},${CURRENT_PATH})

ifeq ($(zstd),1)
LDFLAGS += -lzstd
endif

#hdf5
H5_LIB = ./hdf5-1.8.14/hdf5/lib/libhdf5.a
H5_INCLUDE = -I./hdf5-1.8.14/hdf5/include
LIBFLAGS += -Wl,-rpath,$(dir $(abspath $(lastword $(MAKEFILE_LIST))))hdf5-1.8.14/hdf5/lib -L hdf5-1.8.14/hdf5/lib -lhdf5

#hts
HTS_LIB = ./htslib/libhts.a
HTS_INCLUDE = -I./htslib
LIBFLAGS += -Wl,-rpath,$(dir $(abspath $(lastword $(MAKEFILE_LIST))))htslib -L htslib/ -lhts

#tensorflow
TENS_LIB = ./tensorflow/include/tensorflow/c/c_api.h
TENS_DEPEND = tensorflow/include/tensorflow/c/c_api.h
TENS_LIB = -Wl,-rpath,${PATH_SPACEFIX}tensorflow/lib -L tensorflow/lib
TENS_INCLUDE = -I./tensorflow/include
LIBFLAGS += -Wl,-rpath,$(dir $(abspath $(lastword $(MAKEFILE_LIST))))tensorflow/lib -L tensorflow/lib -ltensorflow
LIBFLAGS = -ltensorflow

#fast5
FAST5_INCLUDE = -I./fast5/include

#add include flags for each library
CXXFLAGS += $(H5_INCLUDE) $(HTS_INCLUDE) $(FAST5_INCLUDE) $(TENS_INCLUDE)
CPPFLAGS += $(H5_INCLUDE) $(HTS_INCLUDE) $(FAST5_INCLUDE) $(TENS_INCLUDE)

MAIN_EXECUTABLE = bin/DNAscent

Expand All @@ -47,12 +60,12 @@ tensorflow/include/tensorflow/c/c_api.h:
if [ ! -e tensorflow/include/tensorflow/c/c_api.h ]; then \
mkdir tensorflow; \
cd tensorflow; \
wget https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-linux-x86_64-1.15.0.tar.gz; \
tar -xzf libtensorflow-gpu-linux-x86_64-1.15.0.tar.gz || exit 255; \
wget https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-linux-x86_64-2.4.1.tar.gz; \
tar -xzf libtensorflow-gpu-linux-x86_64-2.4.1.tar.gz || exit 255; \
cd ..; \
fi

SUBDIRS = src src/scrappie src/pfasta
SUBDIRS = src src/scrappie src/pfasta src/sgsmooth
CPP_SRC := $(foreach dir, $(SUBDIRS), $(wildcard $(dir)/*.cpp))
C_SRC := $(foreach dir, $(SUBDIRS), $(wildcard $(dir)/*.c))
EXE_SRC = src/DNAscent.cpp
Expand All @@ -61,27 +74,31 @@ EXE_SRC = src/DNAscent.cpp
src/gitcommit.h: .git/HEAD .git/index
echo "const char *gitcommit = \"$(shell git rev-parse HEAD)\";" > $@

#log the software path
src/softwarepath.h:
echo "const char *executablePath = \"${PATH_SPACEFIX}\";" > $@

#generate object names
CPP_OBJ = $(CPP_SRC:.cpp=.o)
C_OBJ = $(C_SRC:.c=.o)

depend: .depend

.depend: $(CPP_SRC) $(C_SRC) $(EXE_SRC) $(H5_LIB) $(TENS_LIB) src/gitcommit.h
.depend: $(CPP_SRC) $(C_SRC) $(EXE_SRC) $(H5_LIB) $(TENS_DEPEND) src/gitcommit.h src/softwarepath.h
rm -f ./.depend
$(CXX) $(CXXFLAGS) -MM $(CPP_SRC) $(C_SRC) > ./.depend;
$(CXX) $(CXXFLAGS) $(CPPFLAGS) -MM $(CPP_SRC) $(C_SRC) > ./.depend;

#compile each object
.cpp.o: src/gitcommit.h
$(CXX) -o $@ -c $(CXXFLAGS) -fPIC $<
.cpp.o: src/gitcommit.h src/softwarepath.h
$(CXX) -o $@ -c $(CXXFLAGS) $(CPPFLAGS) -fPIC $<

.c.o:
$(CC) -o $@ -c $(CFLAGS) $(H5_INCLUDE) -fPIC $<
$(CC) -o $@ -c $(CFLAGS) $(CPPFLAGS) $(H5_INCLUDE) -fPIC $<


#compile the main executable
$(MAIN_EXECUTABLE): src/DNAscent.o $(CPP_OBJ) $(C_OBJ) $(HTS_LIB) $(H5_LIB) $(TENS_LIB) src/gitcommit.h
$(CXX) -o $@ $(CXXFLAGS) -fPIC $(CPP_OBJ) $(C_OBJ) $(LIBFLAGS)
$(MAIN_EXECUTABLE): src/DNAscent.o $(CPP_OBJ) $(C_OBJ) $(HTS_LIB) $(H5_LIB) $(TENS_DEPEND) src/gitcommit.h src/softwarepath.h
$(CXX) -o $@ $(CXXFLAGS) $(CPPFLAGS) -fPIC $(CPP_OBJ) $(C_OBJ) $(HTS_LIB) $(H5_LIB) $(TENS_LIB) $(LIBFLAGS) $(LDFLAGS)

clean:
rm -f $(MAIN_EXECUTABLE) $(CPP_OBJ) $(C_OBJ) src/DNAscent.o gitcommit.h
rm -f $(MAIN_EXECUTABLE) $(CPP_OBJ) $(C_OBJ) src/DNAscent.o src/gitcommit.h src/softwarepath.h
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ git clone --recursive https://github.com/MBoemo/DNAscent.git
The DNAscent directory will appear in your current directory. Switch to the latest tagged version and compile the software by running:
```shell
cd DNAscent
git checkout 2.0.2
git checkout 3.0.2
make
```
This will put the DNAscent executable into the DNAscent/bin directory. A typical compile time for DNAscent and its dependencies is 5 minutes.
Expand All @@ -21,6 +21,7 @@ Please see the [documentation](https://dnascent.readthedocs.io) for detailed usa

## Citation
Please cite the following if you use DNAscent for your research:
- Totanes FIG, Gockel J, Chapman SE, Bartfai R, Boemo MA, Merrick CJ. Replication origin mapping in the malaria parasite Plasmodium falciparum. bioRxiv. [[bioRxiv](https://doi.org/10.1101/2022.07.27.501677)]
- Boemo, MA. DNAscent v2: Detecting replication forks in nanopore sequencing data with deep learning. *BMC Genomics* 2021;22:430. [[Journal Link](https://doi.org/10.1186/s12864-021-07736-6)]
- Muller CA, Boemo MA, Spingardi P, Kessler BM, Kriaucionis S, Simpson JT, Nieduszynski CA. Capturing the dynamics of genome replication on individual ultra-long nanopore sequence reads. *Nature Methods* 2019;16:429-436. [[Journal Link](https://www.nature.com/articles/s41592-019-0394-y)]

Expand Down
Binary file removed dnn_models/BrdU_detect.pb
Binary file not shown.
Binary file added dnn_models/detect_model_BrdUEdU/saved_model.pb
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file removed dnn_models/forkSense.pb
Binary file not shown.
46 changes: 46 additions & 0 deletions docs/source/base.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
.. DNAscent documentation master file, created by
sphinx-quickstart on Fri Feb 7 18:58:49 2020.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
DNAscent
====================================

.. toctree::
:maxdepth: 1
:caption: Contents:

installation
index
detect
forkSense
visualisation
workflows
cookbook
releaseNotes

Overview
--------

DNAscent is software designed to detect the modified bases BrdU and EdU in Oxford Nanopore reads. In an experimental setup where BrdU and EdU are incorporated into nascent DNA by replication forks, this software can be used to answer questions that were traditionally answered by DNA fibre analysis.

At present, the only Oxford Nanopore flow cells supported by DNAscent are R9.4.1. The Flongle, MinION, GridION, and PromethION platforms are all supported.

DNAscent is under active development by the `Boemo Group <https://www.boemogroup.org/>`_ based in the `Department of Pathology, University of Cambridge <https://www.path.cam.ac.uk/>`_. We aim to push regular updates and improvements, and incorporating new functionality is an active area of our computational research.


Publications
------------

If you use DNAscent for your research, please cite the following publications:

Totanes FIG, Gockel J, Chapman SE, Bartfai R, Boemo MA, Merrick CJ. Replication origin mapping in the malaria parasite Plasmodium falciparum. [`bioRxiv <https://doi.org/10.1101/2022.07.27.501677>`_]

Boemo, MA DNAscent v2: Detecting replication forks in nanopore sequencing data with deep learning. BMC Genomics 2021;22:430. [`Journal DOI <https://doi.org/10.1186/s12864-021-07736-6>`_]

Muller CA, Boemo MA, Spingardi P, Kessler, BM, Kriaucionis S, Simpson JT, Nieduszynski CA. Capturing the dynamics of genome replication on individual ultra-long nanopore sequence reads. Nature Methods 2019;16:429-436. [`Journal DOI <https://doi.org/10.1038/s41592-019-0394-y>`_]

Bugs, Questions, and Comments
-----------------------------

Should any bugs arise or if you have any questions about usage, please raise a `GitHub issue <https://github.com/MBoemo/DNAscent/issues>`_. If you have comments or suggestions to improve the software or the documentation, please Email Michael Boemo at [email protected].
8 changes: 4 additions & 4 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@
# -- Project information -----------------------------------------------------

project = 'DNAscent'
copyright = '2020, Michael A. Boemo'
copyright = '2022, Michael A. Boemo'
author = 'Michael A. Boemo'

# The full version, including alpha/beta/rc tags
release = '2.0.2'
release = '3.0.2'


# -- General configuration ---------------------------------------------------
Expand All @@ -41,7 +41,7 @@
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = []

master_doc = 'index'
master_doc = 'base'

# -- Options for HTML output -------------------------------------------------

Expand All @@ -53,4 +53,4 @@
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = []
html_static_path = ['_static']
84 changes: 31 additions & 53 deletions docs/source/cookbook.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,73 +29,51 @@ The following barebones script parses the output of ``DNAscent detect``. We ite
strand = splitLine[4]
else:
posOnRef = int(splitLine[0])
probBrdU = float(splitLine[1])
sixMerOnRef = splitLine[2]
probEdU = float(splitLine[1])
probBrdU = float(splitLine[2])
sixMerOnRef = splitLine[3]
#add these values to a container or do some processing here
f.close()
The following barebones script parses the output of ``DNAscent forkSense``. Note the similarity to the above script: All DNAscent output files were designed to have a very similar format to aid user processing.
.. code-block:: python
f = open('path/to/output.forkSense','r')
for line in f:
#ignore the header lines
if line[0] == '#':
continue
#split the line into a list by whitespace
splitLine = line.rstrip().split()
if line[0] == '>':
readID = splitLine[0][1:]
chromosome = splitLine[1]
refStart = int(splitLine[2])
refEnd = int(splitLine[3])
strand = splitLine[4]
else:
posOnRef = int(splitLine[0])
probLeftFork = float(splitLine[1])
probRightFork = float(splitLine[2])
#add these values to a container or do some processing here
f.close()
And again for ``DNAscent regions``:
The following example plots a histogram of fork track lengths from bed files generated by DNAscent forkSense.

.. code-block:: python
f = open('path/to/output.regions','r')
import matplotlib
from matplotlib import pyplot as plt
for line in f:
fnames = ['leftForks_DNAscent_forkSense.bed','rightForks_DNAscent_forkSense.bed']
#ignore the header lines
if line[0] == '#':
continue
#split the line into a list by whitespace
splitLine = line.rstrip().split()
forkLengths = []
if line[0] == '>':
for fn in fnames:
f = open(fn,'r')
readID = splitLine[0][1:]
chromosome = splitLine[1]
refStart = int(splitLine[2])
refEnd = int(splitLine[3])
strand = splitLine[4]
else:
regionStart = int(splitLine[0])
regionEnd = int(splitLine[1])
regionScore = float(splitLine[2])
for line in f:
#add these values to a container or do some processing here
#ignore the header lines
if line[0] == '#':
continue
#split the line into a list by whitespace
splitLine = line.rstrip().split()
lbound = int(splitLine[1])
rbound = int(splitLine[2])
forkLengths.append(rbound-lbound)
f.close()
f.close()
plt.figure()
plt.hist(forkLengths)
plt.xlabel('Fork Track Length (bp)')
plt.ylabel('Count')
plt.savefig('forkTrackLen.pdf')
plt.close()
Loading

0 comments on commit 5d4869c

Please sign in to comment.