diff --git a/ChangeLog b/ChangeLog index c9b7199c8e..fce4647316 100644 --- a/ChangeLog +++ b/ChangeLog @@ -132,6 +132,10 @@ * doc/dev/coding-guidelines-and-review.rst: update developer docs with compensatory changes +2016-02-15 Kevin Murray + + * scripts/load-into-counting.py: Insert khmer's version into .info files + 2015-08-14 Luiz Irber * lib/subset.cc: check iterator before decrementing in @@ -331,7 +335,7 @@ * scripts/*.py, khmer/khmer_args.py: added epilog sanitation * scripts/{load-into-counting,load-graph,load-into-countgraph, load-into-nodegraph}.py, tests/{test_scripts,test_normalize_by_median, - test_streaming_io,test_countgraph}: renamed load-into-counting -> + test_streaming_io,test_countgraph}: renamed load-into-counting -> load-into-countgraph, load-graph -> load-into-nodegraph, fixed tests to not bork @@ -344,7 +348,7 @@ * CITATION, doc/{index,introduction,user/scripts}.rst, khmer/khmer_args.py: formatting fixes and new citation for the software as a whole * Makefile: PDF building hints, tweaked dependencies, update coverity URL, - new target to generate author list for the paper citation + new target to generate author list for the paper citation * sort-authors-list.py: helper script for the above * doc/conf.py: don't generate a module index * doc/contributors.rst: formatting, remove references to old lab @@ -376,18 +380,18 @@ and KmerIterator, respectively. * lib/Makefile: Add -std=c++11 flag. * Makefile: Update -std=c++11 flag in libtest target. - * lib/hashtable.{cc,hh}: Update calc_connected_graph_size to use Traverser. + * lib/hashtable.{cc,hh}: Update calc_connected_graph_size to use Traverser. Change kmer_degree to use functions from traversal.cc. Remove redundant count_kmers_with_radius in favor of calc_connected_graph_size. Update traverse_from_kmer to use Traverser. Hashtable subclasses KmerFactory. * lib/{hashtable.hh,kmer_hash.{cc,hh}}: Move KmerIterator from hashtable.hh to kmer_hash.{cc,hh}. Add Kmer class to store forward, reverse, and uniqified integer representations of k-mers, and to handle string - conversion. Update KmerIterator to emit objects of type Kmer and to subclass + conversion. Update KmerIterator to emit objects of type Kmer and to subclass KmerFactory; add doxygen markup. * lib/khmer.hh: Forward declare Kmer and typedef new Kmer data structures. * lib/subset.{cc,hh}: Move constructor definition to .cc file. Remove - queue_neighbors in favor of new traversal machinery. Update find_all_tags, + queue_neighbors in favor of new traversal machinery. Update find_all_tags, sweep_for_tags, and find_all_tags_truncate_on_abundance to use Traverser. * setup.py: Add traversal.{cc,hh} to deps. @@ -453,9 +457,9 @@ 2015-08-04 Jacob Fenton - * khmer/khmer_args.py, oxli/functions.py: migrated estimation functions out + * khmer/khmer_args.py, oxli/functions.py: migrated estimation functions out oxli and into khmer_args - * oxli/build_graph.py, tests/test_oxli_functions.py, + * oxli/build_graph.py, tests/test_oxli_functions.py, sandbox/{estimate_optimal_hash,optimal_args_hashbits}.py, scripts/{normalize-by-median,unique-kmers}.py: changed to not break on location change @@ -680,7 +684,7 @@ abundance-dist}.py,tests/test_{normalize_by_median,subset_graph,hashbits, oxli_function}.py: pylint cleanup. -2015-07-17 Michael R. Crusoe +2015-07-17 Michael R. Crusoe * Makefile, tests/test_read_aligner.py: import khmer when pylinting. @@ -732,7 +736,7 @@ 2015-07-05 Jacob Fenton - * doc/whats-new-2.0.rst: added in normalize-by-median.py broken paired + * doc/whats-new-2.0.rst: added in normalize-by-median.py broken paired updates. 2015-07-05 Michael R. Crusoe @@ -809,7 +813,7 @@ 2015-06-28 Qingpeng Zhang - * sandbox/{estimate_optimal_hash,optimal_args_hashbits}.py: added sandbox + * sandbox/{estimate_optimal_hash,optimal_args_hashbits}.py: added sandbox methods for estimating memory usage based on desired fp rate, etc. 2015-06-27 Kevin Murray @@ -1147,7 +1151,7 @@ 2015-04-17 Jessica Mizzi - * tests/test_scripts.py: split test_extract_long_sequences + * tests/test_scripts.py: split test_extract_long_sequences into test_extract_long_sequences_fa and test_extract_long_sequences_fq 2015-04-15 Elmar Bucher @@ -1183,7 +1187,7 @@ 2015-04-15 Sarah Guermond * scripts/trim-low-abund.py: implemented STDOUT output, redirected - existing print statements to STDERR, fixed existing & new PEP 8 issues + existing print statements to STDERR, fixed existing & new PEP 8 issues * tests/test_scripts.py: added test for above changes 2014-04-15 Andreas Härpfer @@ -1197,25 +1201,25 @@ 2015-04-15 Susan Steinman - * khmer/scripts/normalize-by-median.py: pass individual arg values to + * khmer/scripts/normalize-by-median.py: pass individual arg values to functions instead of ArgParse object 2015-04-15 Thomas Fenzl - * scripts/{count-overlap.py,readstats.py},tests/test_scripts.py: + * scripts/{count-overlap.py,readstats.py},tests/test_scripts.py: added a --csv option to readstats updated documentation for count-overlap - * khmer/_khmermodule.cc: fixed missing error handling + * khmer/_khmermodule.cc: fixed missing error handling for hashbits_count_overlap 2015-04-15 en zyme * khmer/khmer/kfile.py: check_file_status() -> check_input_files() - * khmer/sandbox/{collect-reads, khmer/sandbox/sweep-reads}.py + * khmer/sandbox/{collect-reads, khmer/sandbox/sweep-reads}.py khmer/scripts/{abundance-dist-single, abundance-dist, annotate-partitions, - count-median, count-overlap, do-partition, extract-paired-reads, + count-median, count-overlap, do-partition, extract-paired-reads, extract-partitions, filter-abund-single, filter-abund, filter-stoptags, - find-knots, interleave-reads, load-graph, load-into-counting, + find-knots, interleave-reads, load-graph, load-into-counting, make-initial-stoptags, merge-partitions, partition-graph, sample-reads-randomly, split-paired-reads}.py: check_file_status() -> check_input_files() @@ -1291,7 +1295,7 @@ 2015-04-13 Thomas Fenzl - * lib/{khmer_exception.hh,{counting,hashbits,hashtable,subset}.cc}: changed + * lib/{khmer_exception.hh,{counting,hashbits,hashtable,subset}.cc}: changed khmer_exception to use std::string to fix memory management. 2015-04-13 Elmar Bucher @@ -1318,7 +1322,7 @@ 2015-04-13 David Lin - * scripts/abundance-dist.py: disambiguate documentation for force and + * scripts/abundance-dist.py: disambiguate documentation for force and squash options 2015-04-13 Michael R. Crusoe @@ -1338,7 +1342,7 @@ 2015-04-10 Jacob Fenton - * scripts/test-{scripts.py}: added test for check_file_writable using + * scripts/test-{scripts.py}: added test for check_file_writable using load_into_counting 2015-04-10 Phillip Garland @@ -1504,7 +1508,7 @@ * scripts/abundance-dist-single.py: Use CSV format for the histogram. * scripts/count-overlap.py: Use CSV format for the curve file output. Includes column headers. - * scripts/abundance-dist-single.py: Use CSV format for the histogram. + * scripts/abundance-dist-single.py: Use CSV format for the histogram. Includes column headers. * tests/test_scripts.py: add test functions for the --csv option in abundance-dist-single.py and count-overlap.py @@ -1516,9 +1520,9 @@ 2015-02-25 Aditi Gupta - * sandbox/{collect-reads.py, correct-errors.py, - normalize-by-median-pct.py, slice-reads-by-coverage.py, - sweep-files.py, sweep-reads3.py, to-casava-1.8-fastq.py}: + * sandbox/{collect-reads.py, correct-errors.py, + normalize-by-median-pct.py, slice-reads-by-coverage.py, + sweep-files.py, sweep-reads3.py, to-casava-1.8-fastq.py}: Replaced 'accuracy' with 'quality'. Fixes #787. 2015-02-25 Tamer A. Mansour @@ -1748,7 +1752,7 @@ * khmer/utils.py: Added single write_record fuction to write FASTA/Q * scripts/{abundance-dist,extract-long-sequences,extract-partitions, - interleave-reads,normalize-by-median,sample-reads-randomly}.py: + interleave-reads,normalize-by-median,sample-reads-randomly}.py: Replaced FASTA/Q writing method with write_record 2015-01-23 Michael R. Crusoe @@ -1816,13 +1820,13 @@ 2015-01-09 Rhys Kidd * lib/khmer.hh: implement generic NONCOPYABLE() macro guard - * lib/hashtable.hh: apply NONCOPYABLE macro guard in case of future - modifications to Hashtable that might exposure potential memory corruption + * lib/hashtable.hh: apply NONCOPYABLE macro guard in case of future + modifications to Hashtable that might exposure potential memory corruption with default copy constructor 2014-12-30 Michael Wright - * tests/test_scripts.py: Attained complete testing coverage for + * tests/test_scripts.py: Attained complete testing coverage for scripts/filter_abund.py 2014-12-30 Brian Wyss @@ -1884,8 +1888,8 @@ filter-stoptags,interleave-reads,load-graph,load-into-counting, make-initial-stoptags,merge-partitions,normalize-by-median,partition-graph, sample-reads-randomly,split-paired-reads}.py,setup.cfg, - tests/{test_script_arguments,test_scripts}.py: Added force option to all - scripts to script IO sanity checks and updated tests to match. + tests/{test_script_arguments,test_scripts}.py: Added force option to all + scripts to script IO sanity checks and updated tests to match. 2014-12-17 Michael R. Crusoe @@ -2014,7 +2018,7 @@ 2014-11-11 Jacob Fenton - * do-partition.py: replaced threading args in scripts with things from + * do-partition.py: replaced threading args in scripts with things from khmer_args * khmer/theading_args.py: removed as it has been deprecated @@ -2042,7 +2046,7 @@ annotate-partitions, count-median, count-overlap, do-partition, extract-paired-reads, extract-partitions, filter-abund, filter-abund-single, filter-stoptags, find-knots, load-graph, load-into-counting, - make-initial-stoptags, merge-partitions, normalize-by-median, + make-initial-stoptags, merge-partitions, normalize-by-median, partition-graph, sample-reads-randomly}.py: changed stdout output in scripts to go to stderr. @@ -2073,11 +2077,11 @@ * scripts/{abundance-dist-single, abundance-dist, count-median, count-overlap, extract-paired-reads, filter-abund-single, load-graph, load-into-counting, make-initial-stoptags, - partition-graph, split-paired-reads}.py: + partition-graph, split-paired-reads}.py: added output file listing at end of file * scripts/extract-long-sequences.py: refactored to set write_out to sys.stdout by default; added output location listing. - * scripts/{fastq-to-fasta, interleave-reads}.py: + * scripts/{fastq-to-fasta, interleave-reads}.py: added output file listing sensitive to optional -o argument * tests/test_scripts.py: added test for scripts/make-initial-stoptags.py @@ -2093,7 +2097,7 @@ .empty() instead of .size() 2014-09-19 Ben Taylor - + * Makefile: Add astyle, format targets * doc/dev/coding-guidelines-and-review.txt: Add reference to `make format` target @@ -2106,7 +2110,7 @@ average cutoff. * sandbox/slice-reads-by-coverage.py: added script to extract reads with a specific coverage slice (based on median k-mer abundance). - + 2014-09-09 Titus Brown * Added sandbox/README.rst to describe/reference removed files, @@ -2238,7 +2242,7 @@ * khmer/thread_utils.py, sandbox/filter-below-abund.py, scripts/{extract-long-sequences,load-graph,load-into-counting, normalize-by-median,split-paired-reads}.py, - scripts/galaxy/gedlab.py: fix minor PyLint issues + scripts/galaxy/gedlab.py: fix minor PyLint issues 2014-08-20 Michael R. Crusoe @@ -2246,7 +2250,7 @@ 2014-08-20 Rhys Kidd - * setup.py,README.rst,doc/user/install.txt: Test requirement for a + * setup.py,README.rst,doc/user/install.txt: Test requirement for a 64-bit operating system, documentation changes. Fixes #529 2014-08-19 Michael R. Crusoe @@ -2333,7 +2337,7 @@ 2014-07-23 Qingpeng Zhang - * scripts/load-graph.py: write fp rate into *.info file with option + * scripts/load-graph.py: write fp rate into *.info file with option to switch on * tests/test_scripts.py: add test_load_graph_write_fp @@ -2343,16 +2347,16 @@ 2014-07-23 Leonor Garcia-Gutierrez - * tests/test_hashbits.py, tests/test_graph.py, + * tests/test_hashbits.py, tests/test_graph.py, tests/test_lump.py: reduced memory requirement - + 2014-07-23 Heather L. Wiencko * khmer_tst_utils.py: added import traceback * test_scripts.py: added test for normalize_by_median.py for fpr rate 2014-07-22 Justin Lippi - + * khmer/_khmermodule.cc: removed unused assignment * lib/read_aligner.cc,lib/read_aligner.hh: wrapped function declarations in the same compiler options that the only invocations are in to avoid @@ -2375,7 +2379,7 @@ * scripts/filter-abund.py: no longer asks for parameters that are unused, issue #524 -2014-07-22 Justin Lippi +2014-07-22 Justin Lippi * tests/khmer_tst_utils.py: put runscript here * tests/test_sandbox_scripts.py: remove 'runsandbox', renamed to runscript @@ -2389,11 +2393,11 @@ 2014-07-22 Rodney Picett * lib/scoringmatrix.{cc,hh}: removed assign function, issue #502 - + 2014-07-22 Leonor Garcia-Gutierrez * tests/test_counting_single.py: reduced memory requirements - + 2014-07-21 Titus Brown * sandbox/saturate-by-median.py: introduce new sandbox script for @@ -2414,7 +2418,7 @@ 2014-06-20 Chuck Pepe-Ranney - * scripts/extract-partitions.py: added epilog documentation for + * scripts/extract-partitions.py: added epilog documentation for .dist columns. 2014-06-20 Michael R. Crusoe @@ -2503,7 +2507,7 @@ 2014-06-11 Michael Wright - * scripts/load-into-counting: Fixed docstring misnomer to + * scripts/load-into-counting: Fixed docstring misnomer to load-into-counting.py 2014-06-10 Michael R. Crusoe @@ -2527,7 +2531,7 @@ 2014-06-10 Michael Wright - * scripts/extract-long-sequences: Moved from sandbox, added argparse and + * scripts/extract-long-sequences: Moved from sandbox, added argparse and FASTQ support. * scripts/fastq-to-fasta: Fixed outdated argparse oversight. * tests/test_scripts.py: Added tests for extract-long-sequences.py @@ -2545,7 +2549,7 @@ 2014-06-03 Chuck Pepe-Ranney - * scripts/abundance-dist.py: removed call to check_space on infiles. + * scripts/abundance-dist.py: removed call to check_space on infiles. 2014-05-31 Michael R. Crusoe @@ -2578,10 +2582,10 @@ 2014-05-22 Michael Wright - * scripts/fastq-to-fasta: Moved and improved fastq-to-fasta.py into scripts + * scripts/fastq-to-fasta: Moved and improved fastq-to-fasta.py into scripts from sandbox * tests/test_scripts.py: Added tests for fastq-to-fasta.py - * tests/test-data: Added test-fastq-n-to-fasta.py file with N's in + * tests/test-data: Added test-fastq-n-to-fasta.py file with N's in sequence for testing 2014-05-19 Michael R. Crusoe @@ -2631,7 +2635,7 @@ * sandbox/calc-best-assembly.py: added script to calculate best assembly from a list of contig/scaffold files - + 2014-04-23 Titus Brown * scripts/abundance-dist-single.py: fixed problem where ReadParser was @@ -2771,7 +2775,7 @@ * test_scripts.py: add test code for count-overlap.py * count-overlap.py: (fix a bug because of a typo and hashsize was replaced by min_hashsize) - * count-overlap.py: needs hashbits table generated by load-graph.py. + * count-overlap.py: needs hashbits table generated by load-graph.py. This information is added to the "usage:" line. * count-overlap.py: fix minor PyLint issues @@ -2854,7 +2858,7 @@ 2014-03-03 Michael R. Crusoe * lib/trace_logger.{cc,hh}: fix for Coverity CID 1063852: Uninitialized - scalar field (UNINIT_CTOR) + scalar field (UNINIT_CTOR) * lib/node.cc: fix for Coverity CID 1173035: Uninitialized scalar field (UNINIT_CTOR) * lib/hashbits.hh: fix for Coverity CID 1153101: Resource leak in object @@ -2884,7 +2888,7 @@ users without root access to install virtualenv instead of pip. * Added support for sparse graph labeling -* Added script to reinflate partitions from read files using the +* Added script to reinflate partitions from read files using the labeling system, called sweep-reads-by-partition-buffered.py * Implemented __new__ methods for Hashbits, enforced inheritance @@ -2952,14 +2956,14 @@ clarified and simplified. * khmer/_khmermodule.cc has gotten a once-over with cpychecker. Type errors were eliminated and the error checking has improved. -* Several fixes motivated by the results of a Coverity C/C++ scan. +* Several fixes motivated by the results of a Coverity C/C++ scan. * Tests that require greater than 0.5 gigabytes of memory are now annotated as being 'highmem' and be skipped by changing two lines in setup.cfg * warnings about -Wstrict-prototypes will no longer appear -* contributors to this release are: ctb, mr-c and camillescott. +* contributors to this release are: ctb, mr-c and camillescott. 2013-10-15 Michael R. Crusoe @@ -3082,7 +3086,7 @@ project to conform to the standard layout * test_scripts.py: add test code for count-overlap.py * count-overlap.py: (fix a bug because of a typo and hashsize was replaced by min_hashsize) - * count-overlap.py: needs hashbits table generated by load-graph.py. + * count-overlap.py: needs hashbits table generated by load-graph.py. This information is added to the "usage:" line. * count-overlap.py: fix minor PyLint issues @@ -3165,7 +3169,7 @@ project to conform to the standard layout 2014-03-03 Michael R. Crusoe * lib/trace_logger.{cc,hh}: fix for Coverity CID 1063852: Uninitialized - scalar field (UNINIT_CTOR) + scalar field (UNINIT_CTOR) * lib/node.cc: fix for Coverity CID 1173035: Uninitialized scalar field (UNINIT_CTOR) * lib/hashbits.hh: fix for Coverity CID 1153101: Resource leak in object @@ -3195,7 +3199,7 @@ users without root access to install virtualenv instead of pip. * Added support for sparse graph labeling -* Added script to reinflate partitions from read files using the +* Added script to reinflate partitions from read files using the labeling system, called sweep-reads-by-partition-buffered.py * Implemented __new__ methods for Hashbits, enforced inheritance @@ -3263,14 +3267,14 @@ clarified and simplified. * khmer/_khmermodule.cc has gotten a once-over with cpychecker. Type errors were eliminated and the error checking has improved. -* Several fixes motivated by the results of a Coverity C/C++ scan. +* Several fixes motivated by the results of a Coverity C/C++ scan. * Tests that require greater than 0.5 gigabytes of memory are now annotated as being 'highmem' and be skipped by changing two lines in setup.cfg * warnings about -Wstrict-prototypes will no longer appear -* contributors to this release are: ctb, mr-c and camillescott. +* contributors to this release are: ctb, mr-c and camillescott. 2013-10-15 Michael R. Crusoe diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py index 0520c680e6..2a572c9c32 100755 --- a/scripts/load-into-counting.py +++ b/scripts/load-into-counting.py @@ -1,7 +1,7 @@ #! /usr/bin/env python # This file is part of khmer, https://github.com/dib-lab/khmer/, and is # Copyright (C) 2011-2015, Michigan State University. -# Copyright (C) 2015, The Regents of the University of California. +# Copyright (C) 2015-2016, The Regents of the University of California. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are @@ -117,16 +117,17 @@ def main(): check_space_for_graph(args.output_countgraph_filename, tablesize, args.force) + info_filename = base + ".info" check_file_writable(base) - check_file_writable(base + ".info") + check_file_writable(info_filename) print('Saving k-mer countgraph to %s' % base, file=sys.stderr) print('Loading kmers from sequences in %s' % repr(filenames), file=sys.stderr) # clobber the '.info' file now, as we always open in append mode below - if os.path.exists(base + '.info'): - os.remove(base + '.info') + with open(info_filename, 'w') as info_fp: + print('khmer version:', khmer.__version__, file=info_fp) print('making countgraph', file=sys.stderr) countgraph = khmer_args.create_countgraph(args) @@ -159,13 +160,13 @@ def main(): print('mid-save', base, file=sys.stderr) countgraph.save(base) - with open(base + '.info', 'a') as info_fh: + with open(info_filename, 'a') as info_fh: print('through', filename, file=info_fh) total_num_reads += rparser.num_reads n_kmers = countgraph.n_unique_kmers() print('Total number of unique k-mers:', n_kmers, file=sys.stderr) - with open(base + '.info', 'a') as info_fp: + with open(info_filename, 'a') as info_fp: print('Total number of unique k-mers:', n_kmers, file=info_fp) print('saving', base, file=sys.stderr) @@ -176,7 +177,7 @@ def main(): khmer.calc_expected_collisions( countgraph, args.force, max_false_pos=.2) - with open(base + '.info', 'a') as info_fp: + with open(info_filename, 'a') as info_fp: print('fp rate estimated to be %1.3f\n' % fp_rate, file=info_fp) if args.summary_info: @@ -209,7 +210,7 @@ def main(): print('fp rate estimated to be %1.3f' % fp_rate, file=sys.stderr) print('DONE.', file=sys.stderr) - print('wrote to:', base + '.info', file=sys.stderr) + print('wrote to:', info_filename, file=sys.stderr) if __name__ == '__main__': main() diff --git a/tests/test_scripts.py b/tests/test_scripts.py index e5db190af2..5c65411aa4 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -331,6 +331,29 @@ def test_load_into_counting_bad_summary_fmt(): assert "invalid choice: 'badfmt'" in err, err +def test_load_into_counting_info_version(): + script = 'load-into-counting.py' + args = ['-x', '1e5', '-N', '2', '-k', '20'] # use small HT + + outfile = utils.get_temp_filename('out') + infile = utils.get_test_data('random-20-a.fa') + + args.extend([outfile, infile]) + + (status, out, err) = utils.runscript(script, args) + + ht_file = outfile + assert os.path.exists(ht_file), ht_file + + info_file = outfile + '.info' + assert os.path.exists(info_file), info_file + with open(info_file) as info_fp: + versionline = info_fp.readline() + version = versionline.split(':')[1].strip() + assert versionline.startswith('khmer version:'), versionline + assert version == khmer.__version__, version + + def _make_counting(infilename, SIZE=1e7, N=2, K=20, BIGCOUNT=True): script = 'load-into-counting.py' args = ['-x', str(SIZE), '-N', str(N), '-k', str(K)]