diff --git a/.gitignore b/.gitignore index 6ac08067..4d9bd62d 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,6 @@ perf.* analysis.txt *.naf .kdev4 +compile_commands.json +*.fastafs +*.zst diff --git a/CMakeLists.txt b/CMakeLists.txt index 5cfeee9a..5f8a45f9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,10 +5,11 @@ cmake_minimum_required(VERSION 2.8) project(fastafs) +# helps debugging: # Do this once in a while - find different compiler warnings #set(CMAKE_CXX_COMPILER "clang++") -set(PROJECT_VERSION "1.9.0") +set(PROJECT_VERSION "1.10.0") set(PACKAGE_URL "https://github.com/yhoogstrate/fastafs") set(PACKAGE_BUGREPORT "${PACKAGE_URL}/issues") @@ -112,7 +113,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") # -DXXH_NAMESPACE=ZST_ if(DEBUG) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -pg -ggdb -Wconversion -D_FILE_OFFSET_BITS=64")# -Werror makes compilation crash when warnings are given (also part of Travis) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -pg -ggdb -Wconversion -D_FILE_OFFSET_BITS=64 -g -DBOOST_TEST_TOOLS_UNDER_DEBUGGER -DBOOST_TEST_TOOLS_DEBUGGABLE")# -Werror makes compilation crash when warnings are given (also part of Travis) else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -D_FILE_OFFSET_BITS=64") endif() @@ -128,9 +129,12 @@ add_library(libfastafs SHARED src/flags.cpp src/fastafs.cpp src/ucsc2bit.cpp + + src/xbit_byte_encoder.cpp src/twobit_byte.cpp src/fourbit_byte.cpp src/fivebit_fivebytes.cpp + src/database.cpp src/utils.cpp src/sequence_region.cpp @@ -201,6 +205,11 @@ add_test(test_chunked_reader ${BUILD_TEST_DIR}/test_chunked_reader) set_target_properties(test_chunked_reader PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${BUILD_TEST_DIR}) target_link_libraries(test_chunked_reader libfastafs) +add_executable(test_database test/database/test_database.cpp) +set_target_properties(test_database PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${BUILD_TEST_DIR}) +target_link_libraries(test_database libfastafs) +add_test(test_database ${BUILD_TEST_DIR}/test_database) + add_executable(test_fastafs_as_ucsc2bit test/fastafs/test_ucsc2bit.cpp) add_test(test_fastafs_as_ucsc2bit ${BUILD_TEST_DIR}/test_fastafs_as_ucsc2bit) set_target_properties(test_fastafs_as_ucsc2bit PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${BUILD_TEST_DIR}) diff --git a/Changelog b/Changelog index 25524c01..502ac8aa 100644 --- a/Changelog +++ b/Changelog @@ -1,3 +1,10 @@ +2023-01-22 + * v.10.0 + * Better ninja/meson support + * Code clean-ups + * Restructured the chunked_reader class and subclasses according to + the desgin patterns philisopy + 2020-04-29 Youri Hoogstrate * v1.9.0 diff --git a/dependencies/zstd-seekable-adapted/zstdseek_utils.cpp b/dependencies/zstd-seekable-adapted/zstdseek_utils.cpp index 750bd775..d529d609 100644 --- a/dependencies/zstd-seekable-adapted/zstdseek_utils.cpp +++ b/dependencies/zstd-seekable-adapted/zstdseek_utils.cpp @@ -175,7 +175,6 @@ size_t ZSTD_seekable_decompressFile_orDie(ZSTD_seekable_decompress_init_data* fh size_t written = 0; if(fh->fin == NULL) { - printf("fin == NULL: YES!!\n"); exit(124); } //else { @@ -183,7 +182,6 @@ size_t ZSTD_seekable_decompressFile_orDie(ZSTD_seekable_decompress_init_data* fh //} if (feof(fh->fin)) { - printf ("!!!! FEOF !!!!! \n"); exit(123); } //else { @@ -232,6 +230,7 @@ size_t ZSTD_seekable_decompressFile_orDie(ZSTD_seekable_decompress_init_data* fh //fh->fin_locked = false; + return written; } diff --git a/fastafs.kdev4 b/fastafs.kdev4 new file mode 100644 index 00000000..b0cc0f08 --- /dev/null +++ b/fastafs.kdev4 @@ -0,0 +1,4 @@ +[Project] +CreatedFrom=CMakeLists.txt +Manager=KDevCMakeManager +Name=fastafs diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index 7079dc78..5d9e615a 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -19,49 +19,103 @@ #include "zstd_seekable_utils.hpp" -enum compression_type : signed char { +enum compression_type : signed char { // dit is State undefined = -1, uncompressed = 0, zstd = 1 }; +//@todo implement w/ state design pattern +//url: https://refactoring.guru/design-patterns/state -class chunked_reader +class chunked_reader; + +class State { +protected: + chunked_reader *context; // back-reference to context, to access file_i, filename etc. + public: - chunked_reader(char *); // filename - chunked_reader(const char *); // filename - ~chunked_reader(); + virtual ~State() {}; + void set_context(chunked_reader *); + size_t read(unsigned char *, size_t, size_t &, size_t &); // reads from buffer, context a-specific + + // virtual functions: + virtual void fopen(off_t) = 0; + virtual size_t cache_buffer() = 0; // formerly update_..._buffer + virtual void seek(off_t) = 0; +}; // compression type + + + +class ContextUncompressed : public State +{ +private: + std::ifstream *fh = nullptr; + +public: + void fopen(off_t) override; + size_t cache_buffer() override; + void seek(off_t); + + ~ContextUncompressed() override; +}; + +class ContextZstdSeekable : public State +{ +private: + ZSTD_seekable_decompress_init_data* fh = nullptr; - void init(); // generic tasks needed for init + size_t const buffOutSize = ZSTD_DStreamOutSize(); + char* const buffOut = (char*) malloc_orDie(buffOutSize); + ZSTD_seekable* const seekable = ZSTD_seekable_create(); //@todo -> in constructor, check if not NULL - std::string filename; // try doing this with inode + size_t maxFileSize; - std::ifstream *fh_flat; - void update_flat_buffer(); +public: + void fopen(off_t) override; + size_t cache_buffer() override; + void seek(off_t); - ZSTD_seekable_decompress_init_data* fh_zstd; - void update_zstd_buffer(); + ~ContextZstdSeekable() override; +}; - compression_type filetype; +class chunked_reader // master chunked_reader +{ +protected: + std::string filename; + char buffer[READ_BUFFER_SIZE + 1]; + size_t buffer_i; size_t buffer_n; off_t file_i; - void set_filetype(); + State *state; - size_t read(char *, size_t);// @deprecate - size_t read(unsigned char *, size_t); - unsigned char read(); +public: + void TransitionTo(State *); // @todo rename to set_compression_type + chunked_reader(const char *) ; + ~chunked_reader(); + + State* find_state(); + const std::type_info& typeid_state(); + const std::string& get_filename(); + char* get_buffer(); + + compression_type get_filetype(); + + void fopen(off_t); + size_t cache_buffer(); + size_t read(unsigned char *, size_t); void seek(off_t); size_t tell(); - //size_t size(); + size_t get_file_i(); }; diff --git a/include/database.hpp b/include/database.hpp index 98f44784..5094f10e 100644 --- a/include/database.hpp +++ b/include/database.hpp @@ -10,16 +10,18 @@ class database { - std::string path; - std::string idx;// current default: ~/.local/share/fastafs/ + const std::string path; + const std::string idx;// current default: ~/.local/share/fastafs/ //hash_map idx;// "test": path + "/" + tostr(i) + ".fastafs" public: - database(); + database(const std::string &); + + const static std::string get_default_dir(); void force_db_exists(); std::string add(char *); void load();// reads path + "/" + info.txt, only containing N void list();// 'ls' - std::string get(std::string); + std::string get(char *); }; diff --git a/include/fastafs.hpp b/include/fastafs.hpp index b423a2cd..609e5240 100644 --- a/include/fastafs.hpp +++ b/include/fastafs.hpp @@ -28,7 +28,7 @@ struct ffs2f_init_seq { std::vector m_starts;// file position based std::vector m_ends;// file position based - + const uint32_t filesize;// with padding and newlines [fastafs_seq->fasta_filesize(cache->padding_arg)] ffs2f_init_seq(const uint32_t padding, size_t n_blocks, size_t m_blocks, const uint32_t n_lines, const uint32_t filesize): @@ -46,7 +46,8 @@ struct ffs2f_init { ffs2f_init(size_t size, uint32_t padding_arg): padding_arg(padding_arg), sequences(size) {} - ~ffs2f_init(void) { + ~ffs2f_init(void) + { for(size_t i = 0; i < sequences.size(); i++) { delete sequences[i]; } @@ -78,7 +79,7 @@ class fastafs_seq uint32_t fasta_filesize(uint32_t padding); void view_fasta(ffs2f_init_seq*, chunked_reader &fh); - size_t view_sequence_region_size(ffs2f_init_seq*, sequence_region*, std::ifstream *); + size_t view_sequence_region_size(sequence_region*); uint32_t view_sequence_region(ffs2f_init_seq*, sequence_region*, char *, size_t, off_t, chunked_reader &); uint32_t view_fasta_chunk(ffs2f_init_seq*, char *, size_t, off_t, chunked_reader &); template uint32_t view_fasta_chunk_generalized(ffs2f_init_seq*, char *, size_t, off_t, chunked_reader &); @@ -113,7 +114,7 @@ class fastafs std::string name; std::string filename; compression_type filetype; - + std::vector data; uint32_t crc32f;// crc32 as found in fastafs file @@ -126,7 +127,7 @@ class fastafs void load(std::string); void view_fasta(ffs2f_init*); - size_t view_sequence_region_size(ffs2f_init*, const char *); // read stuff like "chr1:123-456" into the buffer + size_t view_sequence_region_size(const char *); // read stuff like "chr1:123-456" into the buffer uint32_t view_sequence_region(ffs2f_init*, const char *, char*, size_t, off_t); // read stuff like "chr1:123-456" into the buffer uint32_t view_fasta_chunk(ffs2f_init*, char*, size_t, off_t, chunked_reader &); uint32_t view_fasta_chunk(ffs2f_init*, char*, size_t, off_t); diff --git a/include/fivebit_fivebytes.hpp b/include/fivebit_fivebytes.hpp index 7240f6a8..d2bc46e0 100644 --- a/include/fivebit_fivebytes.hpp +++ b/include/fivebit_fivebytes.hpp @@ -39,8 +39,8 @@ class fivebit_fivebytes static unsigned char iterator_to_offset(unsigned int); static unsigned char decompressed_to_compressed_bytes(unsigned char); // when only 5/8 bytes are filled, only 4/5 bytes need to be written - static const off_t nucleotides_to_compressed_fileoffset(size_t); // file offset waarna gelezen kan worden - static const off_t nucleotides_to_compressed_offset(size_t);// aantal bytes nodig om zoveel data weg te schrijven + static off_t nucleotides_to_compressed_fileoffset(size_t); // file offset waarna gelezen kan worden + static off_t nucleotides_to_compressed_offset(size_t);// aantal bytes nodig om zoveel data weg te schrijven void next(chunked_reader &); // update the compressed data and set buffer to decompressed data diff --git a/include/flags.hpp b/include/flags.hpp index d6874d87..38a9a93a 100644 --- a/include/flags.hpp +++ b/include/flags.hpp @@ -6,13 +6,13 @@ #include -const unsigned char FASTAFS_BITFLAG_COMPLETE = 0; +const static unsigned char FASTAFS_BITFLAG_COMPLETE = 0; -const unsigned char FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_1 = 0; -const unsigned char FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_2 = 1; -// const unsigned char FASTAFS_SEQUENCE_BITFLAG_???? = 2 ; // is reserved -const unsigned char FASTAFS_SEQUENCE_BITFLAG_COMPLETE = 3; -const unsigned char FASTAFS_SEQUENCE_BITFLAG_CIRCULAR = 4; +const static unsigned char FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_1 = 0; +const static unsigned char FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_2 = 1; +// const static unsigned char FASTAFS_SEQUENCE_BITFLAG_???? = 2 ; // is reserved +const static unsigned char FASTAFS_SEQUENCE_BITFLAG_COMPLETE = 3; +const static unsigned char FASTAFS_SEQUENCE_BITFLAG_CIRCULAR = 4; @@ -38,23 +38,21 @@ constexpr std::array bitmasks = { }; -//#include "utils.hpp" - class twobit_flag { +private: + std::array bits; // 00000000 00000000 + protected: twobit_flag(); - std::array bits; // 00000000 00000000 - - // set by flag void set_flag(unsigned char, bool);// counting flag from bit 0(!) bool get_flag(unsigned char); public: - void set(char *); - std::array &get_bits(void); // get bit 0 or bit 1 + void set(unsigned char *); + std::array &get_bits(void); // get bit 0 or bit 1 - needed for exporting flags to file(s) }; @@ -62,10 +60,7 @@ class fastafs_flags : public twobit_flag { public: bool is_complete(); - bool is_incomplete() - { - return !this->is_complete(); - }; + bool is_incomplete(); void set_complete(); void set_incomplete(); @@ -83,25 +78,13 @@ class fastafs_sequence_flags : public twobit_flag bool is_protein(); // alphabet: 'ABCDEFGHIJKLMNOPQRSTUVWYZX*-' bool is_complete(); - bool is_incomplete() - { - return !this->is_complete(); - }; + bool is_incomplete(); // is not complete bool is_circular(); - bool is_linear() - { - return !this->is_circular(); - }; - - bool is_twobit() - { - return (this->is_dna() | this->is_rna()); - }; - bool is_fourbit() - { - return this->is_iupec_nucleotide(); - }; + bool is_linear(); // is not circular + + bool is_twobit(); + bool is_fourbit(); // set by entity diff --git a/include/fourbit_byte.hpp b/include/fourbit_byte.hpp index f6decccf..31e87e31 100644 --- a/include/fourbit_byte.hpp +++ b/include/fourbit_byte.hpp @@ -6,21 +6,22 @@ #include "config.hpp" #include "chunked_reader.hpp" +#include "xbit_byte_encoder.hpp" -class fourbit_byte +class fourbit_byte : public xbit_byte_encoder { public: - static const char fourbit_alhpabet[17]; + static const char xbit_byte_encoder::n_fill_unmasked = '-'; + static const char xbit_byte_encoder::n_fill_masked = '-'; + static const unsigned char xbit_byte_encoder::bits_per_nucleotide = 4; + + //static const char fourbit_alphabet[17]; static char encode_hash[256][3]; - static const char n_fill_unmasked = '-'; - static const char n_fill_masked = '-'; - static const unsigned char bits_per_nucleotide = 4; - static const char nucleotides_per_byte = 8 / bits_per_nucleotide ; // this is about compressed data - static const char nucleotides_per_chunk = 8 / bits_per_nucleotide ; // this is about decompressed chunks + static const char nucleotides_per_byte = 8 / fourbit_byte::bits_per_nucleotide ; // this is about compressed data + static const char nucleotides_per_chunk = 8 / fourbit_byte::bits_per_nucleotide ; // this is about decompressed chunks - unsigned char data; void set(unsigned char, unsigned char); void set(char*);// string met 4 bytes set char *get(void); @@ -28,10 +29,12 @@ class fourbit_byte static unsigned char iterator_to_offset(unsigned int); - static const off_t nucleotides_to_compressed_fileoffset(size_t); // file offset waarna gelezen kan worden - static const off_t nucleotides_to_compressed_offset(size_t);// aantal bytes nodig om zoveel data weg te schrijven + static off_t nucleotides_to_compressed_fileoffset(size_t); // file offset waarna gelezen kan worden + //static off_t nucleotides_to_compressed_offset(size_t);// aantal bytes nodig om zoveel data weg te schrijven - void next(chunked_reader &); // update the compressed data and set buffer to decompressed data + //@todo chunked reader should be in a function above this. + //next(char *) should be implemented with decompressed content only + //void next(chunked_reader &); // update the compressed data and set buffer to decompressed data }; #endif diff --git a/include/sequence_region.hpp b/include/sequence_region.hpp index cb86ed87..f3854c26 100644 --- a/include/sequence_region.hpp +++ b/include/sequence_region.hpp @@ -19,21 +19,43 @@ -class sequence_region -{ -public: - sequence_region(char *); - sequence_region(const char *); - std::string seq_name; - bool has_defined_end; +class sequence_region +{ +private: + bool defined_end;//whether the requested region has a defined end position (chr1:1-) has; (chr1:1-2) has not off_t start; off_t end; -private: + std::string seq_name; + + void parse(const char *); + + +public: + sequence_region(char *); + sequence_region(const char * seqstr); + + + std::string get_seq_name() + { + return seq_name; + }; + off_t get_start_position(void) const + { + return start; + }; + off_t get_end_position(void) const + { + return end; + }; + bool has_defined_end(void) const + { + return defined_end; + }; }; diff --git a/include/twobit_byte.hpp b/include/twobit_byte.hpp index 798bee8c..e470216d 100644 --- a/include/twobit_byte.hpp +++ b/include/twobit_byte.hpp @@ -6,24 +6,25 @@ #include "config.hpp" #include "chunked_reader.hpp" +#include "xbit_byte_encoder.hpp" - -class twobit_byte +class twobit_byte : public xbit_byte_encoder { private: // things only needed by the compression [encoding, not decoding] public: + static const char xbit_byte_encoder::n_fill_unmasked = 'N'; + static const char xbit_byte_encoder::n_fill_masked = 'n'; + static const unsigned char xbit_byte_encoder::bits_per_nucleotide = 2; + char (&encode_hash)[256][5]; twobit_byte(char (&encode_hash_arg)[256][5]): encode_hash(encode_hash_arg) {}; - static const char n_fill_unmasked = 'N'; - static const char n_fill_masked = 'n'; - static const unsigned char bits_per_nucleotide = 2; - static const char nucleotides_per_byte = 8 / bits_per_nucleotide ; // this is about compressed data - static const char nucleotides_per_chunk = 8 / bits_per_nucleotide ; // this is about decompressed chunks - unsigned char data; // go private + static const char nucleotides_per_byte = 8 / twobit_byte::bits_per_nucleotide ; // this is about compressed data + static const char nucleotides_per_chunk = 8 / twobit_byte::bits_per_nucleotide ; // this is about decompressed chunks + void set(unsigned char, unsigned char); void set(char*);// string met 4 bytes set char *get(void); @@ -31,10 +32,10 @@ class twobit_byte static unsigned char iterator_to_offset(unsigned int); - static const off_t nucleotides_to_compressed_fileoffset(size_t); // file offset waarna gelezen kan worden - static const off_t nucleotides_to_compressed_offset(size_t);// aantal bytes nodig om zoveel data weg te schrijven + static off_t nucleotides_to_compressed_fileoffset(size_t); // file offset waarna gelezen kan worden + static off_t nucleotides_to_compressed_offset(size_t);// aantal bytes nodig om zoveel data weg te schrijven - void next(chunked_reader &); // update the compressed data + //void next(chunked_reader &); // update the compressed data }; diff --git a/include/utils.hpp b/include/utils.hpp index e5751f6f..b8a62b5d 100644 --- a/include/utils.hpp +++ b/include/utils.hpp @@ -1,13 +1,13 @@ -uint32_t fourbytes_to_uint(char *, unsigned char); -uint32_t fourbytes_to_uint_ucsc2bit(char *, unsigned char); +uint32_t fourbytes_to_uint(unsigned char *, unsigned char); +uint32_t fourbytes_to_uint_ucsc2bit(unsigned char *, unsigned char); // for flags uint16_t twobytes_to_uint(char *); void uint_to_twobytes(char *chars, uint16_t n); size_t remove_chars(char *s, int c, size_t l);// to remove - characters from string -void uint_to_fourbytes(char *, uint32_t); +void uint_to_fourbytes(unsigned char *, uint32_t); void uint_to_fourbytes_ucsc2bit(char *, uint32_t); char *human_readable_fs(uint32_t, char *); diff --git a/include/xbit_byte_encoder.hpp b/include/xbit_byte_encoder.hpp new file mode 100644 index 00000000..423fe24d --- /dev/null +++ b/include/xbit_byte_encoder.hpp @@ -0,0 +1,51 @@ + +#ifndef XBIT_BYTE_HPP +#define XBIT_BYTE_HPP + +#include +#include "config.hpp" + +#include "chunked_reader.hpp" + + + +class xbit_byte_encoder +{ +private: + +public: + // these members need to be overwritten by parental classes + static const char n_fill_unmasked; + static const char n_fill_masked; + + static const unsigned char bits_per_nucleotide; + + unsigned char data; // go private + + xbit_byte_encoder() {}; + + /* + char (&encode_hash)[256][5]; + twobit_byte(char (&encode_hash_arg)[256][5]): encode_hash(encode_hash_arg) {}; + + unsigned char data; // go private + void set(unsigned char, unsigned char); + void set(char*);// string met 4 bytes set + char *get(void); + char *get(unsigned char); + + static unsigned char iterator_to_offset(unsigned int); + + static const off_t nucleotides_to_compressed_fileoffset(size_t); // file offset waarna gelezen kan worden + static const off_t nucleotides_to_compressed_offset(size_t);// aantal bytes nodig om zoveel data weg te schrijven + */ + + void next(chunked_reader &); // update the compressed data +}; + + + + + + +#endif diff --git a/meson.build b/meson.build index 615879a5..d694b46e 100644 --- a/meson.build +++ b/meson.build @@ -30,7 +30,30 @@ configuration_inc = include_directories('include') src = [ './dependencies/zstd-lib-common/xxhash.cpp', './dependencies/zstd-seekable-adapted/zstdseek_compress.cpp', './dependencies/zstd-seekable-adapted/zstdseek_decompress.cpp', './dependencies/zstd-seekable-adapted/zstdseek_utils.cpp', -'./src/chunked_reader.cpp', './src/database.cpp', './src/fastafs.cpp', './src/fasta_to_fastafs.cpp', './src/fivebit_fivebytes.cpp', './src/flags.cpp', './src/fourbit_byte.cpp', './src/fuse.cpp', './src/sequence_region.cpp', './src/twobit_byte.cpp', './src/ucsc2bit.cpp', './src/ucsc2bit_to_fastafs.cpp', './src/utils.cpp', './src/lsfastafs.cpp', './src/main.cpp'] +'./src/chunked_reader.cpp', './src/database.cpp', './src/fastafs.cpp', './src/fasta_to_fastafs.cpp', './src/xbit_byte_encoder.cpp', './src/fivebit_fivebytes.cpp', './src/flags.cpp', './src/fourbit_byte.cpp', './src/fuse.cpp', './src/sequence_region.cpp', './src/twobit_byte.cpp', './src/ucsc2bit.cpp', './src/ucsc2bit_to_fastafs.cpp', './src/utils.cpp', './src/lsfastafs.cpp', +'./src/main.cpp' +] + +src__test_check = [ +'./dependencies/zstd-lib-common/xxhash.cpp', +'./dependencies/zstd-seekable-adapted/zstdseek_compress.cpp', './dependencies/zstd-seekable-adapted/zstdseek_decompress.cpp', './dependencies/zstd-seekable-adapted/zstdseek_utils.cpp', +'./src/chunked_reader.cpp', './src/database.cpp', './src/fastafs.cpp', './src/fasta_to_fastafs.cpp', './src/xbit_byte_encoder.cpp', './src/fivebit_fivebytes.cpp', './src/flags.cpp', './src/fourbit_byte.cpp', './src/fuse.cpp', './src/sequence_region.cpp', './src/twobit_byte.cpp', './src/ucsc2bit.cpp', './src/ucsc2bit_to_fastafs.cpp', './src/utils.cpp', './src/lsfastafs.cpp', +'./test/check/test_check.cpp' +] + +src__test_cache = [ +'./dependencies/zstd-lib-common/xxhash.cpp', +'./dependencies/zstd-seekable-adapted/zstdseek_compress.cpp', './dependencies/zstd-seekable-adapted/zstdseek_decompress.cpp', './dependencies/zstd-seekable-adapted/zstdseek_utils.cpp', +'./src/chunked_reader.cpp', './src/database.cpp', './src/fastafs.cpp', './src/fasta_to_fastafs.cpp', './src/xbit_byte_encoder.cpp', './src/fivebit_fivebytes.cpp', './src/flags.cpp', './src/fourbit_byte.cpp', './src/fuse.cpp', './src/sequence_region.cpp', './src/twobit_byte.cpp', './src/ucsc2bit.cpp', './src/ucsc2bit_to_fastafs.cpp', './src/utils.cpp', './src/lsfastafs.cpp', + './test/cache/test_cache.cpp' + ] + +src__test_chunked_reader = [ +'./dependencies/zstd-lib-common/xxhash.cpp', +'./dependencies/zstd-seekable-adapted/zstdseek_compress.cpp', './dependencies/zstd-seekable-adapted/zstdseek_decompress.cpp', './dependencies/zstd-seekable-adapted/zstdseek_utils.cpp', +'./src/chunked_reader.cpp', './src/database.cpp', './src/fastafs.cpp', './src/fasta_to_fastafs.cpp', './src/xbit_byte_encoder.cpp', './src/fivebit_fivebytes.cpp', './src/flags.cpp', './src/fourbit_byte.cpp', './src/fuse.cpp', './src/sequence_region.cpp', './src/twobit_byte.cpp', './src/ucsc2bit.cpp', './src/ucsc2bit_to_fastafs.cpp', './src/utils.cpp', './src/lsfastafs.cpp', + './test/chunked_reader/test_chunked_reader.cpp' + ] incdir = include_directories('dependencies/zstd-seekable-adapted', './dependencies/zstd-lib-common', 'include') @@ -46,3 +69,15 @@ executable('fastafs', src, include_directories : incdir, dependencies: [crypto, openssl, fuse, zlib, zstd]) +executable('test_cache', src__test_cache, + include_directories : incdir, + dependencies: [crypto, openssl, fuse, zlib, zstd]) + +executable('test_check', src__test_check, + include_directories : incdir, + dependencies: [crypto, openssl, fuse, zlib, zstd]) + +executable('test_chunked_reader', src__test_chunked_reader, + include_directories : incdir, + dependencies: [crypto, openssl, fuse, zlib, zstd]) + diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index a93d83ac..5cada62b 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -3,244 +3,345 @@ -chunked_reader::chunked_reader(char * afilename) : - fh_flat(nullptr), fh_zstd(nullptr), buffer_i(0), buffer_n(0), file_i(0) +void State::set_context(chunked_reader *arg_context) { + this->context = arg_context; +} + +// This does not read the actual flat file, this copies its internal buffer to arg_buffer_to +size_t State::read(unsigned char *arg_buffer_to, size_t arg_buffer_to_size, + size_t &buffer_i, size_t &buffer_n) +{ +#if DEBUG + if(arg_buffer_to_size > READ_BUFFER_SIZE) { + throw std::runtime_error("[ContextUncompressed::read] Requested buffer size larger than internal context buffer.\n"); + } +#endif //DEBUG + + size_t written = 0; + const size_t n1 = std::min(buffer_n - buffer_i, arg_buffer_to_size);// number of characters to copy + + // copy current internal buffer completely + while(written < n1) { + arg_buffer_to[written++] = this->context->get_buffer()[buffer_i++]; + } - this->filename = realpath_cpp(afilename); - this->init(); + if(written < arg_buffer_to_size) { + this->context->cache_buffer();// needs to set n to 0 + + while(buffer_i < buffer_n and written < arg_buffer_to_size) { + arg_buffer_to[written++] = this->context->get_buffer()[buffer_i++]; + } + } + + + return written; } -chunked_reader::chunked_reader(const char * afilename) : - fh_flat(nullptr), fh_zstd(nullptr), buffer_i(0), buffer_n(0), file_i(0) + + + + +chunked_reader::chunked_reader(const char * arg_filename) : filename(arg_filename), buffer("\0"), buffer_i(0), buffer_n(0), file_i(0), state(nullptr) { - this->filename = realpath_cpp(afilename); - this->init(); + this->TransitionTo(this->find_state()); } chunked_reader::~chunked_reader() { - //printf("[chunked_reader::~chunked_reader] exterminate, destroy(!)\n"); + delete this->state; +} - if(this->fh_flat != nullptr) { - if(this->fh_flat->is_open()) { - this->fh_flat->close(); - } - delete this->fh_flat; - } - if(this->fh_zstd != nullptr) { - //ZSTD_seekable_free(this->fh_zstd->seekable); - fclose_orDie(this->fh_zstd->fin); +const std::string& chunked_reader::get_filename() +{ + return this->filename; +} + +char * chunked_reader::get_buffer() +{ + return this->buffer; +} - //delete this->fh_zstd->seekable; - //delete this->fh_zstd->fin; - delete this->fh_zstd; +//@todo remove and use typeid only +compression_type chunked_reader::get_filetype() +{ + if(this->typeid_state() == typeid(ContextUncompressed)) { + return compression_type::uncompressed; + } else if(this->typeid_state() == typeid(ContextZstdSeekable)) { + return compression_type::zstd; + } else { + return compression_type::undefined; } } -void chunked_reader::init() -{ - this->set_filetype(); - switch(this->filetype) { - case uncompressed: - this->fh_flat = new std::ifstream; - this->fh_flat->open(this->filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate); - if(this->fh_flat->is_open()) { - this->fh_flat->seekg(0, std::ios::beg); - this->update_flat_buffer(); - } else { - throw std::runtime_error("[chunked_reader::init] Cannot open file for reading.\n"); - } - break; +size_t chunked_reader::cache_buffer() +{ + size_t s = this->state->cache_buffer(); + this->buffer_n = s; - case zstd: - //printf("[chunked_reader::init()] - init ZSTD_seekable_decompress_init_data* fh_zstd; \n"); - this->fh_zstd = ZSTD_seekable_decompressFile_init(this->filename.c_str()); - // make zstd handle - to be implemented later on - //ZSTD_seekable_decompress_data - break; + this->buffer_i = 0; + this->file_i += s; - default: - throw std::runtime_error("[chunked_reader::init] Should never happen - but avoids compiler warning.\n"); - break; - } + return s; } -void chunked_reader::set_filetype() +size_t chunked_reader::read(unsigned char *arg_buffer, size_t arg_buffer_size) { - if(is_zstd_file((const char*) this->filename.c_str())) { - this->filetype = zstd; - } else { - this->filetype = uncompressed; + //arg_buffer_size = std::min(arg_buffer_size, (size_t) READ_BUFFER_SIZE); +#if DEBUG + + if(arg_buffer == nullptr) { + throw std::runtime_error("[chunked_reader::read] Invalid / not allocated buffer.\n"); } + + if(arg_buffer_size > READ_BUFFER_SIZE) { + throw std::runtime_error("[chunked_reader::read] Requested buffer size larger than internal context buffer.\n"); + } + +#endif //DEBUG + + return this->state->read(arg_buffer, arg_buffer_size, this->buffer_i, this->buffer_n); + } -size_t chunked_reader::read(char *arg_buffer, size_t buffer_size) +void chunked_reader::TransitionTo(State *arg_state) { + if(this->state != nullptr) { + delete this->state; // delete and destruct previous state, incl file points, should also run fh.close(); etc. + } - buffer_size = std::min(buffer_size, (size_t) READ_BUFFER_SIZE); - size_t written = 0; + this->state = arg_state; + this->state->set_context(this); +} - while(this->buffer_i < this->buffer_n and written < buffer_size) { - arg_buffer[written++] = this->buffer[this->buffer_i++]; - } +void chunked_reader::fopen(off_t file_offset) +{ + this->state->fopen(file_offset); // open file handle + this->cache_buffer(); // read into buffer +} +void chunked_reader::seek(off_t arg_offset) +{ + this->file_i = arg_offset; // @todo obtain return value from this->state->seek() and limit this + this->state->seek(arg_offset);// set file pointer + this->cache_buffer();// update internal buffer +} - /* - size_t n = std::min(this->buffer_n - this->buffer_i, buffer_size - written); - memcpy(&arg_buffer[written], &this->buffer[this->buffer_i] , n); - written += n; - this->buffer_i += n; - */ - if(written < buffer_size) { - // overwrite buffer - switch(this->filetype) { - case uncompressed: - this->update_flat_buffer(); - break; - case zstd: - this->update_zstd_buffer(); - break; - default: - throw std::runtime_error("[chunked_reader::read] reading from uninitialized object\n"); - break; - } +// positio in the (decompressed) file +size_t chunked_reader::tell() +{ + //printf("Context :: tell: %i - %i + %i = %i\n", + //this->file_i , + //this->buffer_n , + //this->buffer_i , + //this->file_i - this->buffer_n + this->buffer_i); - // same loop again - while(this->buffer_i < this->buffer_n and written < buffer_size) { - arg_buffer[written++] = this->buffer[this->buffer_i++]; - } - /* - somehow memcpy is slightly slower - test again @ mom laptop - size_t n = std::min(this->buffer_n - this->buffer_i, buffer_size - written); - memcpy(&arg_buffer[written], &this->buffer[this->buffer_i] , n); - written += n; - this->buffer_i += n; - */ - } + return this->file_i - this->buffer_n + this->buffer_i; +} - return written; +size_t chunked_reader::get_file_i() +{ + return this->file_i; } +const std::type_info& chunked_reader::typeid_state() +{ + return typeid(*this->state); // somehow pointer is needed to return ContextSubvariant rather than State +} -size_t chunked_reader::read(unsigned char *arg_buffer, size_t buffer_size) +State *chunked_reader::find_state() { + if(is_zstd_file(this->filename.c_str())) { + return new ContextZstdSeekable; + } else { + return new ContextUncompressed; + } +} - buffer_size = std::min(buffer_size, (size_t) READ_BUFFER_SIZE); - size_t written = 0; +void ContextUncompressed::fopen(off_t start_pos = 0) +{ + if(this->fh != nullptr) { + throw std::runtime_error("[ContextUncompressed::fopen] opening a non closed reader.\n"); + } - while(this->buffer_i < this->buffer_n and written < buffer_size) { - arg_buffer[written++] = this->buffer[this->buffer_i++]; + this->fh = new std::ifstream; + this->fh->open(this->context->get_filename().c_str(), std::ios::in | std::ios::binary | std::ios::ate); + if(this->fh == nullptr) { + throw std::runtime_error("[ContextUncompressed::fopen] empty fh?\n"); } + if(this->fh->is_open()) { // @todo move to top-level fopen() + this->seek(start_pos); + } else { + throw std::runtime_error("[chunked_reader_old::init] Cannot open file for reading.\n"); + } +} - if(written < buffer_size) { - // overwrite buffer - switch(this->filetype) { - case uncompressed: - this->update_flat_buffer(); - break; - case zstd: - this->update_zstd_buffer(); - break; - default: - throw std::runtime_error("[chunked_reader::read] reading from uninitialized object\n"); - break; - } +size_t ContextUncompressed::cache_buffer() +{ +#if DEBUG + if(this->fh->tellg() == -1) { + throw std::runtime_error("ContextUncompressed::cache_buffer\n"); + } - // same loop again - while(this->buffer_i < this->buffer_n and written < buffer_size) { - arg_buffer[written++] = this->buffer[this->buffer_i++]; - } + if(this->context->get_buffer() == nullptr) { + throw std::runtime_error("ContextUncompressed::cache_buffer - no valid buffer?\n"); + } +#endif //DEBUG + + this->fh->read(this->context->get_buffer(), READ_BUFFER_SIZE); + + size_t s = (size_t) this->fh->gcount(); + /*printf("context uncompressed cache_buffer: %i\n", (int) s); + printf("%02hhX %02hhX %02hhX %02hhX %02hhX %02hhX %02hhX %02hhX\n", + this->context->get_buffer()[0], + this->context->get_buffer()[1], + this->context->get_buffer()[2], + this->context->get_buffer()[3], + this->context->get_buffer()[4], + this->context->get_buffer()[5], + this->context->get_buffer()[6], + this->context->get_buffer()[7] + );*/ + + if(this->fh->eof()) { + this->fh->clear(); + this->fh->seekg(0, std::ios::end); } - return written; + return s; } -// reads single byte from the buffer -unsigned char chunked_reader::read() +void ContextUncompressed::seek(off_t arg_offset) { - if(this->buffer_i >= this->buffer_n) { - switch(this->filetype) { - case uncompressed: - this->update_flat_buffer(); - break; - case zstd: - this->update_zstd_buffer(); - break; - default: - throw std::runtime_error("[chunked_reader::read] reading from uninitialized object\n"); - break; - } + if(!this->fh->is_open()) { + throw std::runtime_error("[ContextUncompressed::seek] unexpected closed filehandle found.\n"); } - return this->buffer[this->buffer_i++]; + this->fh->seekg(arg_offset, std::ios::beg); } - -void chunked_reader::update_flat_buffer() +ContextUncompressed::~ContextUncompressed() { - this->fh_flat->read(this->buffer, READ_BUFFER_SIZE); + if(this->fh != nullptr) { + this->fh->close(); + if(!this->fh) { + std::cerr << "[ContextUncompressed::~ContextUncompressed] unexpected closed filehandle found.\n"; + } - this->buffer_i = 0; - this->buffer_n = (size_t) this->fh_flat->gcount(); - this->file_i += this->buffer_n; + delete this->fh; + } } -void chunked_reader::update_zstd_buffer() + + +size_t ContextZstdSeekable::cache_buffer() { - //size_t written = ZSTD_seekable_decompressFile_orDie(this->filename.c_str(), this->file_i, this->buffer, this->file_i + READ_BUFFER_SIZE); - size_t written = ZSTD_seekable_decompressFile_orDie(this->fh_zstd, this->file_i, this->buffer, this->file_i + READ_BUFFER_SIZE); + //size_t written = ZSTD_seekable_decompressFile_orDie(this->fh_zstd, this->file_i, this->buffer, this->file_i + READ_BUFFER_SIZE); + //this->fh->read(this->context->get_buffer(), READ_BUFFER_SIZE); - this->buffer_i = 0; - this->buffer_n = written; - this->file_i += written; -} + // figure out the location in the decompressed file + size_t written = ZSTD_seekable_decompressFile_orDie( + this->fh, + this->context->get_file_i(), //this->context->file_i, + this->context->get_buffer(), + this->context->tell() + READ_BUFFER_SIZE //this->context->file_i + READ_BUFFER_SIZE + ); -void chunked_reader::seek(off_t offset) -{ - this->file_i = offset; + //printf("written = %i\n", written); + //printf("{{%s}}\n", this->context->get_buffer()); + + /* + { + #if DEBUG + if(this->fh->tellg() == -1) + { + throw std::runtime_error("ContextUncompressed::cache_buffer\n"); + } + #endif //DEBUG - switch(this->filetype) { - case uncompressed: - this->fh_flat->clear(); // reset error state + this->fh->read(this->context->get_buffer(), READ_BUFFER_SIZE); - if(!this->fh_flat->is_open()) { - this->fh_flat->open(this->filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate); + size_t s = (size_t) this->fh->gcount(); + + if(this->fh->eof()) { + this->fh->clear(); + this->fh->seekg(0, std::ios::end); } - this->fh_flat->seekg(offset, std::ios::beg); - this->update_flat_buffer(); - break; - default: - this->update_zstd_buffer(); - break; + return s; } + */ + + //throw std::runtime_error("[ContextZstdSeekable::cache_buffer] not implemented.\n"); + + return written; } +void ContextZstdSeekable::fopen(off_t start_pos) +{ + if(this->fh != nullptr) { + throw std::runtime_error("[ContextZstdSeekable::fopen] opening a non closed reader.\n"); + } -size_t chunked_reader::tell() + + this->fh = ZSTD_seekable_decompressFile_init(this->context->get_filename().c_str()); + + + if((this->fh->fin == NULL) | feof(this->fh->fin)) { + throw std::runtime_error("[ContextZstdSeekable::fopen] not implemented.\n"); + } else { + fseek_orDie(this->fh->fin, start_pos, SEEK_SET);// set initial file handle to 0? + // this->fh->seekg(start_pos, std::ios::beg); + + size_t const initResult = ZSTD_seekable_initFile(this->seekable, fh->fin); + if(ZSTD_isError(initResult)) { + fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); + exit(11); + } + + //@todo class member? + this->maxFileSize = ZSTD_seekable_getFileDecompressedSize(this->seekable); + } +} + +void ContextZstdSeekable::seek(off_t arg_offset) { - return this->file_i - this->buffer_n + this->buffer_i; + fseek_orDie(fh->fin, arg_offset, SEEK_SET); } +ContextZstdSeekable::~ContextZstdSeekable() +{ + if(this->fh != nullptr) { + //ZSTD_seekable_free(this->fh_zstd->seekable); + fclose_orDie(this->fh->fin); + + //delete this->fh_zstd->seekable; + //delete this->fh_zstd->fin; + + delete this->fh; + } + + //throw std::runtime_error("[ContextUncompressed::~ContextUncompressed] not implemented.\n"); +} diff --git a/src/database.cpp b/src/database.cpp index fd7028f4..3f0934df 100644 --- a/src/database.cpp +++ b/src/database.cpp @@ -5,11 +5,33 @@ #include #include #include +#include +#include +#include #include "database.hpp" #include "fastafs.hpp" #include "lsfastafs.hpp" + + +const std::string database::get_default_dir() +{ + const char* home_c = getenv("HOME"); + if(home_c == nullptr) { + struct passwd *pw = getpwuid(getuid()); + home_c = pw->pw_dir; + + if(home_c == nullptr) { + throw std::runtime_error("Could not deterimine home dir. Also, no $HOME environment variable is set."); + } + } + std::string home_s = std::string(home_c); + return home_s + "/.local/share/fastafs"; +} + + + void database::force_db_exists() { DIR *dir = opendir(this->path.c_str()); @@ -34,9 +56,9 @@ void database::force_db_exists() -database::database() : - path(std::string(getenv("HOME")) + "/.local/share/fastafs"), - idx(std::string(getenv("HOME")) + "/.local/share/fastafs/index") +database::database(const std::string &path_arg) : + path(path_arg), + idx(path_arg + "/index") { this->load(); } @@ -54,11 +76,11 @@ void database::list() std::ifstream infile(this->idx); std::string line; std::string version; - + while(std::getline(infile, line)) { std::string fname = this->path + "/" + line + ".fastafs"; bool zstd_seek = false; - + if(!file_exist(fname.c_str())) { fname = this->path + "/" + line + ".fastafs.zst"; zstd_seek = true; @@ -116,9 +138,15 @@ void database::list() } + + // @todo return a filestream to a particular file one day? std::string database::add(char *name) { + if(this->get(name) != "") { + throw std::runtime_error("Trying to add duplicate entry to database."); + } + std::ofstream outputFile; outputFile.open(this->idx, std::fstream::app); @@ -133,16 +161,16 @@ std::string database::add(char *name) /** * @brief searches for a filename that corresponds to the uid */ -std::string database::get(std::string fastafs_name_or_id) +std::string database::get(char *fastafs_name_or_id) { - std::string fname; + std::string fname = ""; std::ifstream infile(this->idx); std::string line; while(std::getline(infile, line, '\n')) { if(line.compare(fastafs_name_or_id) == 0) { fname = this->path + "/" + line + ".fastafs"; - + if(!file_exist(fname.c_str())) { fname = this->path + "/" + line + ".fastafs.zst"; } diff --git a/src/fasta_to_fastafs.cpp b/src/fasta_to_fastafs.cpp index 4b67d4b3..4d82ca49 100644 --- a/src/fasta_to_fastafs.cpp +++ b/src/fasta_to_fastafs.cpp @@ -111,7 +111,7 @@ void fasta_to_fastafs_seq::finish_sequence(std::ofstream &fh_fastafs) } #endif //DEBUG - char buffer[4 + 1]; + unsigned char buffer[4 + 1]; // (over)write number nucleotides std::streamoff index_file_position = fh_fastafs.tellp(); @@ -1347,7 +1347,7 @@ size_t fasta_to_fastafs(const std::string &fasta_file, const std::string &fastaf // write index/footer unsigned int index_file_position = (uint32_t) fh_fastafs.tellp(); - char buffer[4 + 1]; + unsigned char buffer[4 + 1]; uint_to_fourbytes(buffer, (uint32_t) index.size()); fh_fastafs.write(reinterpret_cast(&buffer), (size_t) 4); @@ -1402,7 +1402,7 @@ size_t fasta_to_fastafs(const std::string &fasta_file, const std::string &fastaf // close fastafs, calc crc32, re-open and save fh_fastafs.close(); uint32_t crc32c = file_crc32(fastafs_file, 4, written); - char byte_enc[5] = "\x00\x00\x00\x00"; + unsigned char byte_enc[5] = "\x00\x00\x00\x00"; uint_to_fourbytes(byte_enc, (uint32_t) crc32c); std::ofstream fh_fastafs2(fastafs_file.c_str(), std::ios::out | std::ios::binary | std::ios::app); if(fh_fastafs2.is_open()) { diff --git a/src/fastafs.cpp b/src/fastafs.cpp index d3dd7425..20485bbd 100644 --- a/src/fastafs.cpp +++ b/src/fastafs.cpp @@ -208,8 +208,8 @@ template inline uint32_t fastafs_seq::view_fasta_chunk_generalized( } uint32_t pos = (uint32_t) start_pos_in_fasta; - - + + size_t pos_limit = this->name.size() + 2; if(pos < pos_limit) { const std::string header = ">" + this->name + "\n"; @@ -218,15 +218,15 @@ template inline uint32_t fastafs_seq::view_fasta_chunk_generalized( const uint32_t copied = (uint32_t) header.copy(buffer, tocopy, pos); // effective size of copied data written += (uint32_t) copied; - + if(written >= buffer_size) { return written; } - + pos += (uint32_t) copied; } - const uint32_t offset_from_sequence_line = pos - pos_limit; + const uint32_t offset_from_sequence_line = (uint32_t)(pos - pos_limit); size_t n_block = cache->n_starts.size(); size_t m_block = cache->m_starts.size(); uint32_t newlines_passed = offset_from_sequence_line / (cache->padding + 1);// number of newlines passed (within the sequence part) @@ -303,13 +303,13 @@ template inline uint32_t fastafs_seq::view_fasta_chunk_generalized( } if(pos == cur_n_end) { - //if(pos == cache->n_ends[n_block]) { + //if(pos == cache->n_ends[n_block]) { n_block++; cur_n_end = cache->n_ends[n_block]; cur_n_start = cache->n_starts[n_block]; } if(pos == cur_m_end) { - //if(pos == cache->m_ends[m_block]) { + //if(pos == cache->m_ends[m_block]) { m_block++; cur_m_end = cache->m_ends[m_block]; cur_m_start = cache->m_starts[m_block]; @@ -348,28 +348,23 @@ template inline uint32_t fastafs_seq::view_fasta_chunk_generalized( -size_t fastafs_seq::view_sequence_region_size(ffs2f_init_seq* cache, sequence_region* sr, std::ifstream *fh) +size_t fastafs_seq::view_sequence_region_size(sequence_region* sr) { #if DEBUG - if(cache == nullptr) { - throw std::invalid_argument("fastafs_seq::view_sequence_region - error 01\n"); - } - if(sr == nullptr) { throw std::invalid_argument("fastafs_seq::view_sequence_region - error 02\n"); } - #endif size_t total_requested_size; - if(sr->has_defined_end) { - total_requested_size = std::min((size_t) this->n, (size_t) sr->end + 1); + if(sr->has_defined_end()) { + total_requested_size = std::min((size_t) this->n, (size_t) sr->get_end_position() + 1); } else { total_requested_size = this->n; } - total_requested_size -= sr->start; + total_requested_size -= sr->get_start_position(); return total_requested_size; } @@ -395,13 +390,13 @@ uint32_t fastafs_seq::view_sequence_region(ffs2f_init_seq* cache, uint32_t written = 0; size_t total_requested_size; - if(sr->has_defined_end) { - total_requested_size = std::min((size_t) this->n, (size_t) sr->end + 1); + if(sr->has_defined_end()) { + total_requested_size = std::min((size_t) this->n, (size_t) sr->get_end_position() + 1); } else { total_requested_size = this->n; } - total_requested_size -= sr->start; + total_requested_size -= sr->get_start_position(); total_requested_size -= offset; total_requested_size = std::min(size, total_requested_size); @@ -409,7 +404,7 @@ uint32_t fastafs_seq::view_sequence_region(ffs2f_init_seq* cache, cache, // ffs2f_init_seq* cache, buffer, // char *buffer (size_t) total_requested_size, // size_t buffer_size, - (off_t) 2 + this->name.size() + sr->start + offset, // offset is for chunked reading + (off_t) 2 + this->name.size() + sr->get_start_position() + offset, // offset is for chunked reading fh ); @@ -645,23 +640,26 @@ fastafs::~fastafs() void fastafs::load(std::string afilename) { std::streampos size; - char *memblock; + unsigned char *memblock; chunked_reader fh_in = chunked_reader(afilename.c_str()); { - this->filetype = fh_in.filetype; - - memblock = new char [20 + 1]; //sha1 is 20b + fh_in.fopen(0); + this->filetype = fh_in.get_filetype(); + + memblock = new unsigned char [20 + 1]; //sha1 is 20b // if a user can't compile this line, please replace it with C's // 'realpath' function and delete/free afterwards and send a PR //this->filename = std::filesystem::canonical(afilename);// this path must be absolute because if stuff gets send to FUSE, paths are relative to the FUSE process and probably systemd initialization this->filename = realpath_cpp(afilename); + size = (size_t) fh_in.read(memblock, 16); if(size < 16) { //file.close(); throw std::invalid_argument("Corrupt file: " + filename); } else { + fh_in.seek(0); uint32_t i; @@ -669,6 +667,7 @@ void fastafs::load(std::string afilename) fh_in.read(memblock, 14); memblock[16] = '\0'; + // check magic for(i = 0 ; i < 4; i++) { if(memblock[i] != FASTAFS_MAGIC[i]) { @@ -707,10 +706,11 @@ void fastafs::load(std::string afilename) // name size_t namesize = (unsigned char) memblock[0]; // cast to something that is large enough (> 128) - char name[namesize + 1]; + //char name[namesize + 1]; + unsigned char *name = new unsigned char[namesize + 1]; fh_in.read(name, namesize); name[(unsigned char) memblock[0]] = '\0'; - s->name = std::string(name); + s->name = std::string(reinterpret_cast(name)); // set cursor and save sequence data position fh_in.read(memblock, 4); @@ -801,6 +801,7 @@ void fastafs::view_fasta(ffs2f_init* cache) //std::ifstream file(this->filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate); //if(file.is_open()) { chunked_reader fh = chunked_reader(this->filename.c_str()); + fh.fopen(0); for(uint32_t i = 0; i < this->data.size(); i++) { this->data[i]->view_fasta(cache->sequences[i], fh); @@ -827,22 +828,8 @@ ffs2f_init* fastafs::init_ffs2f(uint32_t padding, bool allow_masking) // estimates the whole file size of a file such as "/seq/chr1:56-" -size_t fastafs::view_sequence_region_size(ffs2f_init* cache, const char *seq_region_arg) +size_t fastafs::view_sequence_region_size(const char *seq_region_arg) { -#if DEBUG - if(cache == nullptr) { - throw std::invalid_argument("fastafs::view_sequence_region - error 01\n"); - } - - if(cache->padding_arg != 0) { - throw std::invalid_argument("fastafs::view_sequence_region - error 02\n"); - } - - if(cache->sequences.size() == 0) { - throw std::invalid_argument("fastafs::view_sequence_region - error 03\n"); - } -#endif - std::ifstream file(this->filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate); if(file.is_open()) { // parse "chr..:..-.." string @@ -850,8 +837,8 @@ size_t fastafs::view_sequence_region_size(ffs2f_init* cache, const char *seq_reg // 02 : check if 'chr' is equals this->data[i].name for(size_t i = 0; i < this->data.size(); i++) { - if(sr.seq_name.compare(this->data[i]->name) == 0) { - return this->data[i]->view_sequence_region_size(cache->sequences[i], &sr, &file); + if(sr.get_seq_name().compare(this->data[i]->name) == 0) { + return this->data[i]->view_sequence_region_size(&sr); } } } @@ -878,6 +865,7 @@ uint32_t fastafs::view_sequence_region(ffs2f_init* cache, const char *seq_region #endif chunked_reader fh = chunked_reader(this->filename.c_str()); + fh.fopen(0); //std::ifstream file(this->filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate); //if(file.is_open()) { // parse "chr..:..-.." string @@ -885,7 +873,7 @@ uint32_t fastafs::view_sequence_region(ffs2f_init* cache, const char *seq_region // 02 : check if 'chr' is equals this->data[i].name for(size_t i = 0; i < this->data.size(); i++) { - if(sr.seq_name.compare(this->data[i]->name) == 0) { + if(sr.get_seq_name().compare(this->data[i]->name) == 0) { return this->data[i]->view_sequence_region(cache->sequences[i], &sr, buffer, buffer_size, file_offset, fh); } } @@ -909,10 +897,13 @@ uint32_t fastafs::view_sequence_region(ffs2f_init* cache, const char *seq_region */ uint32_t fastafs::view_fasta_chunk(ffs2f_init* cache, char *buffer, size_t buffer_size, off_t file_offset) { - chunked_reader fh = chunked_reader(this->filename.c_str()); + fh.fopen(0); + + uint32_t s = this->view_fasta_chunk(cache, buffer, buffer_size, file_offset, fh); + //#printf("%02hhX %02hhX %02hhX %02hhX\n", buffer[0], buffer[1], buffer[2], buffer[3]); - return this->view_fasta_chunk(cache, buffer, buffer_size, file_offset, fh); + return s; } @@ -971,6 +962,7 @@ uint32_t fastafs::view_ucsc2bit_chunk(char *buffer, size_t buffer_size, off_t fi //std::ifstream file(this->filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate); //if(file.is_open()) { chunked_reader file = chunked_reader(this->filename.c_str()); + file.fopen(0); char n_seq[4]; pos_limit += 4;// skip this loop after writing first four bytes while(pos < pos_limit) { @@ -1501,9 +1493,9 @@ uint32_t fastafs::view_faidx_chunk(uint32_t padding, char *buffer, size_t buffer { std::string contents = this->get_faidx(padding); - size_t to_copy = std::min(buffer_size, contents.size() - file_offset ); + size_t to_copy = std::min(buffer_size, contents.size() - file_offset); - return (uint32_t) contents.copy(buffer, to_copy, file_offset ); + return (uint32_t) contents.copy(buffer, to_copy, file_offset); } @@ -1563,14 +1555,14 @@ int fastafs::info(bool ena_verify_checksum) std::cout << "# FASTAFS NAME: " << this->filename << "\n"; std::cout << "# FORMAT: v0-x32"; switch(this->filetype) { - case compression_type::undefined: - printf("?\n"); + case compression_type::undefined: + printf("?\n"); break; - case compression_type::uncompressed : - printf("\n"); + case compression_type::uncompressed : + printf("\n"); break; - case compression_type::zstd: - printf("+Z\n"); + case compression_type::zstd: + printf("+Z\n"); break; } printf("# SEQUENCES: %u\n", (uint32_t) this->data.size()); @@ -1693,12 +1685,12 @@ bool fastafs::check_file_integrity(bool verbose) { uint32_t crc32_current = this->get_crc32(); - char buf_old[5] = "\x00\x00\x00\x00"; + unsigned char buf_old[5] = "\x00\x00\x00\x00"; uint_to_fourbytes(buf_old, (uint32_t) this->crc32f); if(crc32_current != this->crc32f) { - char buf_new[5] = "\x00\x00\x00\x00"; + unsigned char buf_new[5] = "\x00\x00\x00\x00"; uint_to_fourbytes(buf_new, (uint32_t) crc32_current); if(verbose) { @@ -1744,6 +1736,7 @@ bool fastafs::check_sequence_integrity(bool verbose) ffs2f_init* cache = this->init_ffs2f(0, false);// do not use masking, this checksum requires capital / upper case nucleotides chunked_reader file = chunked_reader(this->filename.c_str()); + file.fopen(0); //std::ifstream file(this->filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate); //if(file.is_open()) { for(uint32_t i = 0; i < this->data.size(); i++) { diff --git a/src/fivebit_fivebytes.cpp b/src/fivebit_fivebytes.cpp index 98f945c0..85e1de11 100644 --- a/src/fivebit_fivebytes.cpp +++ b/src/fivebit_fivebytes.cpp @@ -242,7 +242,7 @@ unsigned char fivebit_fivebytes::decompressed_to_compressed_bytes(unsigned char * >Seq * [ABCDEFGH][ABCDEFGH][ACCCAAC] has offset of 2? * */ -const off_t fivebit_fivebytes::nucleotides_to_compressed_fileoffset(size_t n_amino_acids) +off_t fivebit_fivebytes::nucleotides_to_compressed_fileoffset(size_t n_amino_acids) { off_t out = n_amino_acids / (off_t) fivebit_fivebytes::nucleotides_per_chunk; @@ -263,7 +263,7 @@ const off_t fivebit_fivebytes::nucleotides_to_compressed_fileoffset(size_t n_ami * [ABCDEFGH][ABCDEFGH][A] has offset of 11? * */ -const off_t fivebit_fivebytes::nucleotides_to_compressed_offset(size_t n_amino_acids) +off_t fivebit_fivebytes::nucleotides_to_compressed_offset(size_t n_amino_acids) { return fivebit_fivebytes::nucleotides_to_compressed_fileoffset(n_amino_acids) + fivebit_fivebytes::decompressed_to_compressed_bytes(n_amino_acids % fivebit_fivebytes::nucleotides_per_chunk); diff --git a/src/flags.cpp b/src/flags.cpp index b3e11db1..eb90e1e7 100644 --- a/src/flags.cpp +++ b/src/flags.cpp @@ -16,7 +16,7 @@ twobit_flag::twobit_flag() -void twobit_flag::set(char *data) +void twobit_flag::set(unsigned char *data) { this->bits[0] = data[0]; this->bits[1] = data[1]; @@ -37,14 +37,19 @@ bool twobit_flag::get_flag(unsigned char bit) } - -// https://www.learncpp.com/cpp-tutorial/bit-manipulation-with-bitwise-operators-and-bit-masks/ +/** + * @param bit denotes the i'th of 16 bits to set value of + * @param enable whether to enable of disable the bit + * + * more info: https://www.learncpp.com/cpp-tutorial/bit-manipulation-with-bitwise-operators-and-bit-masks/ + */ void twobit_flag::set_flag(unsigned char bit, bool enable) { +#if DEBUG if(bit >= 16) { throw std::runtime_error("twobit_flag::set_flag = out of bound: " + std::to_string(bit) + "\n"); } - +#endif //DEBUG if(enable) { // //this->bits[bit / 8] |= bitmasks[bit]; @@ -69,6 +74,11 @@ bool fastafs_flags::is_complete() return this->get_flag(FASTAFS_BITFLAG_COMPLETE); } +bool fastafs_flags::is_incomplete() +{ + return !this->is_complete(); +} + void fastafs_flags::set_complete() { this->set_flag(FASTAFS_BITFLAG_COMPLETE, true); @@ -85,32 +95,28 @@ void fastafs_flags::set_incomplete() // alphabet: 'ACTG' + 'N' bool fastafs_sequence_flags::is_dna() { - return ( - this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_1) == false && - this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_2) == false); + return (this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_1) == false && + this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_2) == false); } // alphabet: 'ACUG' + 'N' bool fastafs_sequence_flags::is_rna() { - return ( - this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_1) == true && - this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_2) == false); + return (this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_1) == true && + this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_2) == false); } // alphabet: 'ACGTURYKMSWBDHVN' + '-' bool fastafs_sequence_flags::is_iupec_nucleotide() { - return ( - this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_1) == false && - this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_2) == true); + return (this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_1) == false && + this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_2) == true); } bool fastafs_sequence_flags::is_protein() { - return ( - this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_1) == true && - this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_2) == true); + return (this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_1) == true && + this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_2) == true); } @@ -119,11 +125,30 @@ bool fastafs_sequence_flags::is_complete() return this->get_flag(FASTAFS_SEQUENCE_BITFLAG_COMPLETE); } +bool fastafs_sequence_flags::is_incomplete() +{ + return !this->is_complete(); +} + bool fastafs_sequence_flags::is_circular() { return this->get_flag(FASTAFS_SEQUENCE_BITFLAG_CIRCULAR); } +bool fastafs_sequence_flags::is_linear() +{ + return !this->is_circular(); +} + +bool fastafs_sequence_flags::is_twobit() +{ + return (this->is_dna() | this->is_rna()); +} + +bool fastafs_sequence_flags::is_fourbit() +{ + return this->is_iupec_nucleotide(); +} diff --git a/src/fourbit_byte.cpp b/src/fourbit_byte.cpp index 1f0a3f49..05568e9a 100644 --- a/src/fourbit_byte.cpp +++ b/src/fourbit_byte.cpp @@ -20,7 +20,7 @@ binary: IUPEC 11111111 NN */ -const char fourbit_byte::fourbit_alhpabet[17] = "ACGTURYKMSWBDHVN"; +//const char fourbit_byte::fourbit_alphabet[17] = "ACGTURYKMSWBDHVN"; char fourbit_byte::encode_hash[256][3] = {"AA", "AC", "AG", "AT", "AU", "AR", "AY", "AK", "AM", "AS", "AW", "AB", "AD", "AH", "AV", "AN", "CA", "CC", "CG", "CT", "CU", "CR", "CY", "CK", "CM", "CS", "CW", "CB", "CD", "CH", "CV", "CN", "GA", "GC", "GG", "GT", "GU", "GR", "GY", "GK", "GM", "GS", "GW", "GB", "GD", "GH", "GV", "GN", "TA", "TC", "TG", "TT", "TU", "TR", "TY", "TK", "TM", "TS", "TW", "TB", "TD", "TH", "TV", "TN", "UA", "UC", "UG", "UT", "UU", "UR", "UY", "UK", "UM", "US", "UW", "UB", "UD", "UH", "UV", "UN", "RA", "RC", "RG", "RT", "RU", "RR", "RY", "RK", "RM", "RS", "RW", "RB", "RD", "RH", "RV", "RN", "YA", "YC", "YG", "YT", "YU", "YR", "YY", "YK", "YM", "YS", "YW", "YB", "YD", "YH", "YV", "YN", "KA", "KC", "KG", "KT", "KU", "KR", "KY", "KK", "KM", "KS", "KW", "KB", "KD", "KH", "KV", "KN", "MA", "MC", "MG", "MT", "MU", "MR", "MY", "MK", "MM", "MS", "MW", "MB", "MD", "MH", "MV", "MN", "SA", "SC", "SG", "ST", "SU", "SR", "SY", "SK", "SM", "SS", "SW", "SB", "SD", "SH", "SV", "SN", "WA", "WC", "WG", "WT", "WU", "WR", "WY", "WK", "WM", "WS", "WW", "WB", "WD", "WH", "WV", "WN", "BA", "BC", "BG", "BT", "BU", "BR", "BY", "BK", "BM", "BS", "BW", "BB", "BD", "BH", "BV", "BN", "DA", "DC", "DG", "DT", "DU", "DR", "DY", "DK", "DM", "DS", "DW", "DB", "DD", "DH", "DV", "DN", "HA", "HC", "HG", "HT", "HU", "HR", "HY", "HK", "HM", "HS", "HW", "HB", "HD", "HH", "HV", "HN", "VA", "VC", "VG", "VT", "VU", "VR", "VY", "VK", "VM", "VS", "VW", "VB", "VD", "VH", "VV", "VN", "NA", "NC", "NG", "NT", "NU", "NR", "NY", "NK", "NM", "NS", "NW", "NB", "ND", "NH", "NV", "NN"}; @@ -134,7 +134,7 @@ void fourbit_byte::set(unsigned char bit_offset, unsigned char nucleotide) break; #endif //DEBUG } -}; +} // input char "AACCCTTGG" @@ -263,7 +263,7 @@ char *fourbit_byte::get() * dit is naar beneden afgerond zodat de file pointer ervoor start * * */ -const off_t fourbit_byte::nucleotides_to_compressed_fileoffset(size_t n_nucleotides) +off_t fourbit_byte::nucleotides_to_compressed_fileoffset(size_t n_nucleotides) { return (off_t) n_nucleotides / fourbit_byte::nucleotides_per_byte; } @@ -271,9 +271,3 @@ const off_t fourbit_byte::nucleotides_to_compressed_fileoffset(size_t n_nucleoti -void fourbit_byte::next(chunked_reader &r) -{ - this->data = r.read(); -} - - diff --git a/src/fuse.cpp b/src/fuse.cpp index 8490ac32..1a4dd8e8 100644 --- a/src/fuse.cpp +++ b/src/fuse.cpp @@ -116,7 +116,7 @@ static int do_getattr(const char *path, struct stat *st) st->st_nlink = 1; //@todo this needs to be defined with some api stuff:!! - st->st_size = (signed int) ffi->f->view_sequence_region_size(ffi->cache_p0, (strchr(path, '/') + 5)); + st->st_size = (signed int) ffi->f->view_sequence_region_size((strchr(path, '/') + 5)); } else { st->st_mode = S_IFREG | 0444; st->st_nlink = 1; @@ -168,7 +168,7 @@ static int do_getattr(const char *path, struct stat *st) -static int do_readdir(const char *path, void *buffer, fuse_fill_dir_t filler, off_t offset, struct fuse_file_info *fi) +static int do_readdir(const char *path, void *buffer, fuse_fill_dir_t filler, __attribute__((__unused__)) off_t offset, __attribute__((__unused__)) struct fuse_file_info *fi) { fuse_instance *ffi = static_cast(fuse_get_context()->private_data); @@ -248,13 +248,13 @@ static int do_open(const char *path, struct fuse_file_info *fi) ); //printf("sem init... \n"); - sem_init( &(ft->crs[ft->thread_i].sem), 0, 1 ); + sem_init(&(ft->crs[ft->thread_i].sem), 0, 1); //printf("sem init done... \n"); } ft->thread_i = 0; fi->fh = reinterpret_cast(ft); - + #if DEBUG printf("\033[0;35m fi->fh: %u\n", (unsigned int) fi->fh); printf("\033[0;35m fi->writepage: %u\n", fi->writepage); @@ -272,7 +272,7 @@ static int do_open(const char *path, struct fuse_file_info *fi) return 0; } -static int do_flush(const char *path, struct fuse_file_info *fi) +static int do_flush(const char *path, __attribute__((__unused__)) struct fuse_file_info *fi) { return 0; } @@ -287,7 +287,7 @@ static int do_release(const char *path, struct fuse_file_info *fi) sem_destroy(&ft->crs[i].sem); delete ft->crs[i].cr; } - + delete ft; } @@ -692,7 +692,7 @@ fuse_instance *parse_args(int argc, char **argv, char **argv_fuse) if(fi->from_fastafs) { std::string fname; std::string name; - + if(from_file_rather_than_from_db) { fname = std::string(argv[mount_target_arg]); //name = std::filesystem::path(fname).filename(); @@ -702,7 +702,7 @@ fuse_instance *parse_args(int argc, char **argv, char **argv_fuse) size_t lastindex = name.find_last_of("."); name = name.substr(0, lastindex); } else { - database d = database(); + database d = database(database::get_default_dir()); fname = d.get(argv[mount_target_arg]); if(fname.size() == 0) { // invalid mount argument, don't bind fastafs object @@ -779,7 +779,7 @@ void fuse(int argc, char *argv[]) fuse_main(ffi->argc_fuse, argv2, &operations, ffi); } //http://www.maastaar.net/fuse/linux/filesystem/c/2016/05/21/writing-a-simple-filesystem-using-fuse/ - + //return ret; } diff --git a/src/main.cpp b/src/main.cpp index a0330556..46df58e8 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -120,7 +120,7 @@ int main(int argc, char *argv[]) // reserve place in database std::string fname_out; if(to_cache) { - database d = database(); + database d = database(database::get_default_dir()); fname_out = d.add(argv[argc - 2]); } else { fname_out = std::string(argv[argc - 2]); @@ -196,7 +196,7 @@ int main(int argc, char *argv[]) if(from_file) { fname = std::string(argv[argc - 1]); } else { - database d = database(); + database d = database(database::get_default_dir()); fname = d.get(argv[argc - 1]); if(fname.size() == 0) { @@ -255,7 +255,7 @@ int main(int argc, char *argv[]) if(from_file) { fname = std::string(argv[argc - 1]); } else { - database d = database(); + database d = database(database::get_default_dir()); fname = d.get(argv[argc - 1]); if(fname.size() == 0) { @@ -274,7 +274,7 @@ int main(int argc, char *argv[]) } else if(strcmp(argv[1], "mount") == 0) { fuse(argc, argv); } else if(strcmp(argv[1], "list") == 0) { - database d = database(); + database d = database(database::get_default_dir()); d.list(); } else if(strcmp(argv[1], "ps") == 0) { std::unordered_multimap > fastafs_fuse_mounts = get_fastafs_processes(); @@ -304,7 +304,7 @@ int main(int argc, char *argv[]) if(from_file) { fname = std::string(argv[argc - 1]); } else { - database d = database(); + database d = database(database::get_default_dir()); fname = d.get(argv[argc - 1]); if(fname.size() == 0) { diff --git a/src/sequence_region.cpp b/src/sequence_region.cpp index 4f7a2015..6c3b9aeb 100644 --- a/src/sequence_region.cpp +++ b/src/sequence_region.cpp @@ -2,21 +2,17 @@ #include "sequence_region.hpp" - sequence_region::sequence_region(char * seqstr) : - seq_name(""), has_defined_end(false), start(0), end(0) + defined_end(false), start(0), end(0), seq_name("") { - parse((const char *) seqstr);// char* can be converted to cost char*, but not vice versa - } + sequence_region::sequence_region(const char * seqstr) : - seq_name(""), has_defined_end(false), start(0), end(0) + defined_end(false), start(0), end(0), seq_name("") { - parse(seqstr); - } @@ -63,7 +59,7 @@ void sequence_region::parse(const char * seqstr) this->start = std::stoi(start); - this->has_defined_end = true; + this->defined_end = true; this->end = this->start; } else if(p2 == (p + 1)) {// chrA:-123 std::string end = std::string(seqstr, p2 + 1, strlen(seqstr) - p2 - 1); @@ -71,13 +67,13 @@ void sequence_region::parse(const char * seqstr) this->start = 0; this->end = std::stoi(end) ; - this->has_defined_end = true; + this->defined_end = true; } else if(p2 > (p + 1)) { // chrA:123- | chrA:123-456 | chrA:123-456ERR if(p2 + 1 == strlen(seqstr)) { // chrA:123- std::string start = std::string(seqstr, p + 1, p2 - p - 1); this->start = std::stoi(start); - this->has_defined_end = false; + this->defined_end = false; } else { // chrA:123-456 | chrA:123-456ERR std::string start = std::string(seqstr, p + 1, p2 - p - 1); std::string end = std::string(seqstr, p2 + 1, strlen(seqstr) - p2 - 1); @@ -85,14 +81,20 @@ void sequence_region::parse(const char * seqstr) this->start = std::stoi(start) ; - this->has_defined_end = true; + this->defined_end = true; this->end = std::stoi(end) ; } } } - if(this->has_defined_end and this->start > this->end) { +#if DEBUG + if(this->has_defined_end() and this->get_start_position() > this->get_end_position()) { throw std::invalid_argument("Invalid region - start larger than end."); } +#endif //DEBUG } + + + + diff --git a/src/twobit_byte.cpp b/src/twobit_byte.cpp index 7d4bc83c..c552803d 100644 --- a/src/twobit_byte.cpp +++ b/src/twobit_byte.cpp @@ -66,7 +66,7 @@ void twobit_byte::set(unsigned char bit_offset, unsigned char nucleotide) break; #endif //DEBUG } -}; +} // input char "AACCCTTGG" @@ -145,12 +145,12 @@ char *twobit_byte::get() * >Seq * [ACTG][ACTG][AC] has offset of 2 (or 3)? * */ -const off_t twobit_byte::nucleotides_to_compressed_fileoffset(size_t n_nucleotides) +off_t twobit_byte::nucleotides_to_compressed_fileoffset(size_t n_nucleotides) { return (off_t) n_nucleotides / twobit_byte::nucleotides_per_byte; } -const off_t twobit_byte::nucleotides_to_compressed_offset(size_t n_nucleotides) +off_t twobit_byte::nucleotides_to_compressed_offset(size_t n_nucleotides) { return twobit_byte::nucleotides_to_compressed_fileoffset(n_nucleotides + twobit_byte::nucleotides_per_byte - 1); } @@ -158,10 +158,5 @@ const off_t twobit_byte::nucleotides_to_compressed_offset(size_t n_nucleotides) -// needs to be separate function because not encodings read byte-per-byte -void twobit_byte::next(chunked_reader &r) -{ - this->data = r.read(); -} diff --git a/src/ucsc2bit.cpp b/src/ucsc2bit.cpp index 922c961e..e4fbb812 100644 --- a/src/ucsc2bit.cpp +++ b/src/ucsc2bit.cpp @@ -246,7 +246,7 @@ void ucsc2bit::load(std::string afilename) file.close(); throw std::invalid_argument("Corrupt file: " + filename); } else { - char *memblock = new char [20 + 1]; // buffer + unsigned char *memblock = new unsigned char [20 + 1]; // buffer if(memblock == 0) { throw std::invalid_argument("Could not alloc\n"); } @@ -256,7 +256,7 @@ void ucsc2bit::load(std::string afilename) uint32_t i; // HEADER - if(!file.read(memblock, 16)) { + if(!file.read((char *) &memblock[0], 16)) { delete[] memblock; throw std::invalid_argument("Corrupt, unreadable or truncated file (early EOF): " + filename); } @@ -271,7 +271,7 @@ void ucsc2bit::load(std::string afilename) // check version for(i = 0 ; i < 4; i++) { - if(memblock[i+4] != UCSC2BIT_VERSION[i]) { + if(memblock[i + 4] != UCSC2BIT_VERSION[i]) { delete[] memblock; throw std::invalid_argument("Corrupt 2bit file. unknown version: " + filename); } @@ -293,13 +293,14 @@ void ucsc2bit::load(std::string afilename) s = new ucsc2bit_seq; // name length - if(!file.read(memblock, 1)) { + if(!file.read((char *) &memblock[0], 1)) { delete[] memblock; throw std::invalid_argument("Corrupt, unreadable or truncated file (early EOF): " + filename); } // name - char name[memblock[0] + 1]; + //char name[memblock[0] + 1]; + char *name = new char[memblock[0] + 1]; if(!file.read(name, memblock[0])) { delete[] memblock; throw std::invalid_argument("Corrupt, unreadable or truncated file (early EOF): " + filename); @@ -309,7 +310,7 @@ void ucsc2bit::load(std::string afilename) s->name = std::string(name); // file offset for seq-block - if(!file.read(memblock, 4)) { + if(!file.read((char *) &memblock[0], 4)) { delete[] memblock; throw std::invalid_argument("Corrupt, unreadable or truncated file (early EOF): " + filename); } @@ -324,11 +325,11 @@ void ucsc2bit::load(std::string afilename) s = data[i]; file.seekg(s->data_position, std::ios::beg); - file.read(memblock, 4); + file.read((char *) &memblock[0], 4); s->n = fourbytes_to_uint_ucsc2bit(memblock, 0); // n blocks - if(!file.read(memblock, 4)) { + if(!file.read((char *) &memblock[0], 4)) { delete[] memblock; throw std::invalid_argument("Corrupt, unreadable or truncated file (early EOF): " + filename); } @@ -336,7 +337,7 @@ void ucsc2bit::load(std::string afilename) s->n_starts.resize(n_blocks); s->n_ends.resize(n_blocks); for(j = 0; j < n_blocks; j++) { - file.read(memblock, 8); + file.read((char *) &memblock[0], 8); uint32_t n_block_s = fourbytes_to_uint_ucsc2bit(memblock, 0); s->n_starts[j] = n_block_s; @@ -344,7 +345,7 @@ void ucsc2bit::load(std::string afilename) } // m blocks - if(!file.read(memblock, 4)) { + if(!file.read((char *) &memblock[0], 4)) { delete[] memblock; throw std::invalid_argument("Corrupt, unreadable or truncated file (early EOF): " + filename); } @@ -352,7 +353,7 @@ void ucsc2bit::load(std::string afilename) s->m_starts.resize(m_blocks); s->m_ends.resize(m_blocks); for(j = 0; j < m_blocks; j++) { - file.read(memblock, 8); + file.read((char *) &memblock[0], 8); uint32_t m_block_s = fourbytes_to_uint_ucsc2bit(memblock, 0); s->m_starts[j] = m_block_s; diff --git a/src/ucsc2bit_to_fastafs.cpp b/src/ucsc2bit_to_fastafs.cpp index abaa4822..60c7c618 100644 --- a/src/ucsc2bit_to_fastafs.cpp +++ b/src/ucsc2bit_to_fastafs.cpp @@ -26,7 +26,7 @@ size_t ucsc2bit_to_fastafs(std::string ucsc2bit_file, std::string fastafs_file) const char ng[2] = "G"; const char nn[2] = "N"; - char buffer[16 + 1]; + unsigned char buffer[16 + 1]; fastafs fs_new = fastafs(""); uint32_t i, j, n; @@ -51,7 +51,7 @@ size_t ucsc2bit_to_fastafs(std::string ucsc2bit_file, std::string fastafs_file) fh_fastafs << "\x00\x00\x00\x00"s;// position of metedata ~ unknown YET // Read UCSC2bit header (n seq) - fh_ucsc2bit.read(buffer, 12); + fh_ucsc2bit.read((char*)(&buffer[0]), 12); //conversion from unsigned char* to char* (https://stackoverflow.com/questions/604431/c-reading-unsigned-char-from-file-stream) n = fourbytes_to_uint_ucsc2bit(buffer, 8); uint_to_fourbytes(buffer, n); std::vector data(n); @@ -66,15 +66,15 @@ size_t ucsc2bit_to_fastafs(std::string ucsc2bit_file, std::string fastafs_file) data[i] = s; data2[i] = t; - fh_ucsc2bit.read(buffer, 1); + fh_ucsc2bit.read((char*)&buffer[0], 1); s->name_size = buffer[0]; - fh_ucsc2bit.read(buffer, s->name_size); + fh_ucsc2bit.read((char*)&buffer[0], s->name_size); s->name = new char[s->name_size + 1]; - strncpy(s->name, buffer, s->name_size); + strncpy(s->name, (char*)&buffer[0], s->name_size); s->name[s->name_size] = '\0'; - fh_ucsc2bit.read(buffer, 4); + fh_ucsc2bit.read((char*)&buffer[0], 4); s->offset = fourbytes_to_uint_ucsc2bit(buffer, 0); } for(i = 0 ; i < n; i ++) { @@ -84,31 +84,31 @@ size_t ucsc2bit_to_fastafs(std::string ucsc2bit_file, std::string fastafs_file) t = data2[i]; t->file_offset_dna_in_ucsc2bit = fh_fastafs.tellp(); - fh_ucsc2bit.read(buffer, 4); + fh_ucsc2bit.read((char*)&buffer[0], 4); s->dna_size = fourbytes_to_uint_ucsc2bit(buffer, 0); // parse N blocks - fh_ucsc2bit.read(buffer, 4); + fh_ucsc2bit.read((char*)&buffer[0], 4); s->n_blocks = fourbytes_to_uint_ucsc2bit(buffer, 0); for(j = 0; j < s->n_blocks; j++) { - fh_ucsc2bit.read(buffer, 4); + fh_ucsc2bit.read((char*)&buffer[0], 4); s->n_block_starts.push_back(fourbytes_to_uint_ucsc2bit(buffer, 0)); } for(j = 0; j < s->n_blocks; j++) { - fh_ucsc2bit.read(buffer, 4); + fh_ucsc2bit.read((char*)&buffer[0], 4); s->n_block_sizes.push_back(fourbytes_to_uint_ucsc2bit(buffer, 0)); t->N += s->n_block_sizes.back();//ucsc2bit provides lengths } // parse M blocks - fh_ucsc2bit.read(buffer, 4); + fh_ucsc2bit.read((char*)&buffer[0], 4); s->m_blocks = fourbytes_to_uint_ucsc2bit(buffer, 0); for(j = 0; j < s->m_blocks; j++) { - fh_ucsc2bit.read(buffer, 4); + fh_ucsc2bit.read((char*)&buffer[0], 4); s->m_block_starts.push_back(fourbytes_to_uint_ucsc2bit(buffer, 0)); } for(j = 0; j < s->m_blocks; j++) { - fh_ucsc2bit.read(buffer, 4); + fh_ucsc2bit.read((char*)&buffer[0], 4); s->m_block_sizes.push_back(fourbytes_to_uint_ucsc2bit(buffer, 0)); } @@ -117,7 +117,7 @@ size_t ucsc2bit_to_fastafs(std::string ucsc2bit_file, std::string fastafs_file) fh_fastafs.write(reinterpret_cast(&buffer), (size_t) 4); // parse and convert sequence - fh_ucsc2bit.read(buffer, 4); + fh_ucsc2bit.read((char*)&buffer[0], 4); twobit_byte t_in = twobit_byte(ENCODE_HASH_TWOBIT_DNA); const char *decoded_in = t_in.encode_hash[0];// unnecessary initialization but otherwise gcc whines twobit_byte t_out = twobit_byte(ENCODE_HASH_TWOBIT_DNA); @@ -133,7 +133,7 @@ size_t ucsc2bit_to_fastafs(std::string ucsc2bit_file, std::string fastafs_file) } for(j = 0; j < s->dna_size; j++) { if(j % 4 == 0) { - fh_ucsc2bit.read(buffer, 1); + fh_ucsc2bit.read((char*)&buffer[0], 1); t_in.data = buffer[0]; decoded_in = t_in.get();// pointer to the right value? } @@ -272,7 +272,7 @@ size_t ucsc2bit_to_fastafs(std::string ucsc2bit_file, std::string fastafs_file) f.load(fastafs_file); uint32_t crc32c = f.get_crc32(); - char byte_enc[5] = "\x00\x00\x00\x00"; + unsigned char byte_enc[5] = "\x00\x00\x00\x00"; uint_to_fourbytes(byte_enc, (uint32_t) crc32c); //printf("[%i][%i][%i][%i] input!! \n", byte_enc[0], byte_enc[1], byte_enc[2], byte_enc[3]); fh_fastafs.write(reinterpret_cast(&byte_enc), (size_t) 4); diff --git a/src/utils.cpp b/src/utils.cpp index 390a9841..c85ac611 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -14,7 +14,7 @@ #include "config.hpp" // as these chars are coming from ifstream.read -uint32_t fourbytes_to_uint(char *chars, unsigned char offset) +uint32_t fourbytes_to_uint(unsigned char *chars, unsigned char offset) { uint32_t u = ((unsigned char) chars[0 + offset] << 24) | ((unsigned char) chars[1 + offset] << 16) | ((unsigned char) chars[2 + offset] << 8) | ((unsigned char) chars[3 + offset]); return u; @@ -36,7 +36,7 @@ the equivalent of 129 in their encoding is as follows: The function below decodes these 4 charred strings into an uint32_teger */ -uint32_t fourbytes_to_uint_ucsc2bit(char *chars, unsigned char offset) +uint32_t fourbytes_to_uint_ucsc2bit(unsigned char *chars, unsigned char offset) { uint32_t u = ((unsigned char) chars[3 + offset] << 24) | ((unsigned char) chars[2 + offset] << 16) | ((unsigned char) chars[1 + offset] << 8) | ((unsigned char) chars[0 + offset]); @@ -84,7 +84,7 @@ size_t remove_chars(char *s, int c, size_t l) } -void uint_to_fourbytes(char *chars, uint32_t n) +void uint_to_fourbytes(unsigned char *chars, uint32_t n) { chars[0] = (signed char)((n >> 24) & 0xFF); chars[1] = (signed char)((n >> 16) & 0xFF); @@ -207,15 +207,15 @@ bool is_ucsc2bit_file(char *filename) if(fread(buf, 1, 4, fp) == 4) { fclose(fp); - - + + return UCSC2BIT_MAGIC.compare(0, 4, buf) == 0; //return ( - //buf[0] == UCSC2BIT_MAGIC[0] and - //buf[1] == UCSC2BIT_MAGIC[1] and - //buf[2] == UCSC2BIT_MAGIC[2] and - //buf[3] == UCSC2BIT_MAGIC[3] - //);// return true if first byte equals > + //buf[0] == UCSC2BIT_MAGIC[0] and + //buf[1] == UCSC2BIT_MAGIC[1] and + //buf[2] == UCSC2BIT_MAGIC[2] and + //buf[3] == UCSC2BIT_MAGIC[3] + //);// return true if first byte equals > } else { fclose(fp); @@ -330,8 +330,8 @@ bool file_exist(const char *fileName) //moe classical but slower implementation //std::ifstream infile(fileName); //return infile.good(); - + //following implementation should be faster struct stat buffer; - return (stat (fileName, &buffer) == 0); + return (stat(fileName, &buffer) == 0); } diff --git a/src/xbit_byte_encoder.cpp b/src/xbit_byte_encoder.cpp new file mode 100644 index 00000000..f1a2c825 --- /dev/null +++ b/src/xbit_byte_encoder.cpp @@ -0,0 +1,18 @@ +#include +#include + +#include "config.hpp" + +#include "xbit_byte_encoder.hpp" + + + +void xbit_byte_encoder::next(chunked_reader &r) +{ + unsigned char *buf = new unsigned char[2]; + r.read(buf, 1); + this->data = buf[0]; + + delete[] buf; +} + diff --git a/test/cache/test_cache.cpp b/test/cache/test_cache.cpp index 1d5b5086..b1acee0b 100644 --- a/test/cache/test_cache.cpp +++ b/test/cache/test_cache.cpp @@ -231,136 +231,142 @@ BOOST_AUTO_TEST_CASE(Test_size) */ BOOST_AUTO_TEST_CASE(test_cache) { - size_t written = fasta_to_fastafs("test/data/test.fa", "tmp/test_cache_test.fastafs", false); - - static std::string reference = - // GENERIC-HEADER - "\x0F\x0A\x46\x53"s// [0, 3] - "\x00\x00\x00\x00"s// [4, 7] version - "\x80\x00"s// [8, 9] FASTAFS flag [ 10000000 | 00000000 ] - "\x00\x00\x01\x37"s // [10, 13] index position in file (153) - - // DATA - "\x00\x00\x00\x10"s// [14, 17] seq length (16) (of 2bit encoded bytes; n-blocks are excluded) - "\x00\x55\xAA\xFF"s// [18, 21] sequence - "\x00\x00\x00\x00"s// [22, 25] n-blocks (0) - "\x75\x25\x5C\x6D\x90\x77\x89\x99\xAD\x36\x43\xA2\xE6\x9D\x43\x44"s// [26, 41] checksum - "\x00\x00\x00\x01"s// [42, ] m-blocks (1) - "\x00\x00\x00\x00"s// [50, 53] m-block starts (0) - "\x00\x00\x00\x0F"s// [54, 57] m-block starts (15) - "\x00\x00\x00\x0C"s// [58, 61] seq length (12) (of 2bit encoded bytes; n-blocks are excluded) - "\x93\x93\x93"s// [62, 64] sequence: ACTG ACTG nnnn ACTG = 10010011 10010011 00000000 10010011 = \x93 \x93 \x00 \x93 - "\x00\x00\x00\x01"s// [65, 68] n-blocks (1) - "\x00\x00\x00\x08"s// [69, 72] n-block start[1] (08) - "\x00\x00\x00\x0B"s// [73, 76] n-block ends[1] (11) - "\x8B\x56\x73\x72\x4A\x99\x65\xC2\x9A\x1D\x76\xFE\x70\x31\xAC\x8A"s// [69, 96] checksum - "\x00\x00\x00\x01"s// [97, 100] m-blocks (0) - "\x00\x00\x00\x08"s// [101, 104] m-block starts (8) - "\x00\x00\x00\x0B"s// [105, 108] m-block starts (11) - "\x00\x00\x00\x0D"s// [109, 112] seq length (13) (needs to become 2bit-encoded seq-len) - "\x93\x93\xAA\x40"s// [113, 116] sequence: last one is 01 00 00 00 - "\x00\x00\x00\x00"s// [117, 120] n-blocks (0) - "\x61\xDE\xBA\x32\xEC\x4C\x35\x76\xE3\x99\x8F\xA2\xD4\xB8\x72\x88"s// [121, 140] checksum - "\x00\x00\x00\x01"s// [141, 144] m-blocks (0) - "\x00\x00\x00\x08"s// [145, 148] m-block starts (8) - "\x00\x00\x00\x0C"s// [149, 152] m-block starts (12) - "\x00\x00\x00\x0E"s// [153, 156] seq length (14) (of 2bit encoded bytes; n-blocks are excluded) - "\x93\x93\xAA\x50"s// [157, 160] last one is 01 01 00 00 - "\x00\x00\x00\x00"s// [161, 164] n-bocks (0) - "\x99\xB9\x05\x60\xF2\x3C\x1B\xDA\x28\x71\xA6\xC9\x3F\xD6\xA2\x40"s// [165, 184] checksum - "\x00\x00\x00\x01"s// [185, 188] m-blocks (0) - "\x00\x00\x00\x08"s// [189, 192] m-block starts (8) - "\x00\x00\x00\x0D"s// [193, 196] m-block starts (13) - "\x00\x00\x00\x0F"s// [197, 200] seq length (15) (of 2bit encoded nucleotides; n-blocks are excluded) - "\x93\x93\xAA\x54"s// [201, 204] last one is 01 01 01 00 - "\x00\x00\x00\x00"s// [205, 208] n-blocks (0) - "\x36\x25\xAF\xDF\xBE\xB4\x37\x65\xB8\x5F\x61\x2E\x0A\xCB\x47\x39"s// [209, 228] checksum - "\x00\x00\x00\x01"s// [229, 232] m-blocks (0) - "\x00\x00\x00\x08"s// [233, 236] m-block starts (8) - "\x00\x00\x00\x0E"s// [237, 240] m-block starts (14) - "\x00\x00\x00\x04"s// [241, 244] seq length (4) (of 2bit encoded nucleotides; n-blocks are excluded) - "\x93"s// [245, 245] sequence: ACTG NNNN = 10010011 00000000 - "\x00\x00\x00\x01"s// [246, 249] n-blocks (1) - "\x00\x00\x00\x04"s// [250, 253] n-starts [1] (4) - "\x00\x00\x00\x07"s// [254, 257] n-ends [1] (7) - "\xBD\x8C\x08\x0E\xD2\x5B\xA8\xA4\x54\xD9\x43\x4C\xB8\xD1\x4A\x68"s// [258, 277] checksum - "\x00\x00\x00\x01"s// [278, 281] m-blocks (0) - "\x00\x00\x00\x04"s// [282, 285] m-block starts (4) - "\x00\x00\x00\x07"s// [286, 289] m-block starts (7) - "\x00\x00\x00\x04"s// [290, 293] seq length (4) (of 2bit encoded nucleotides; n-blocks are excluded) - "\x93"s// [294, 294] sequence: NNAC TG?? = 00001001 00110000 - "\x00\x00\x00\x01"s// [295, 298] n-blocks (1) - "\x00\x00\x00\x00"s// [299, 302] n-starts[1] (0) - "\x00\x00\x00\x01"s// [303, 306] n-ends[1] (1) - "\x98\x0E\xF3\xA1\xCD\x80\xAF\xEC\x95\x9D\xCF\x85\x2D\x02\x62\x46"s// [307, 326] checksum - "\x00\x00\x00\x01"s// [327, 330] m-blocks (0) - "\x00\x00\x00\x00"s// [331, 334] m-block starts (0) - "\x00\x00\x00\x01"s// [335, 338] m-block starts (1) - - // INDEX - "\x00\x00\x00\x07"s // [339, 342] 7 sequences - "\x010\x00" // [343, 344] complete, DNA and not circular - "\x04"s "chr1"s // [345, 349] name - "\x00\x00\x00\x0E"s // [350, 353] data position in file (14) - "\x010\x00" // [354, 355] complete, DNA and not circular - "\x04"s "chr2"s // [356, 360] name - "\x00\x00\x00\x36"s // [361, 364] data position in file (54) - "\x010\x00" // [, ] complete, DNA and not circular - "\x06"s "chr3.1"s // [, ] name - "\x00\x00\x00\x65"s // [, ] data position in file (101) - "\x010\x00" // [, ] complete, DNA and not circular - "\x06"s "chr3.2"s // [, ] name - "\x00\x00\x00\x8D"s // [, ] data position in file (141) - "\x010\x00" // [, ] complete, DNA and not circular - "\x06"s "chr3.3"s // [, ] name - "\x00\x00\x00\xB5"s // [, ] data position in file (181) - "\x010\x00" // [, ] complete, DNA and not circular - "\x04"s "chr4"s // [, ] name - "\x00\x00\x00\xDD"s // [, ] data position in file (221) - "\x010\x00" // [, ] complete, DNA and not circular - "\x04"s "chr5"s // [, ] name - "\x00\x00\x01\x0A"s // [, ] data position in file (290) - - // METADATA - "\x00"s // [399] no metadata fields [padding will come soon?] - - // CRC32 checksums - "\x1e\x77\x77\x22"s - ; - - BOOST_CHECK_EQUAL(written, 403); - - //BOOST_CHECK(output.compare(uppercase) == 0 or output.compare(mixedcase) == 0); - std::ifstream file("tmp/test_cache_test.fastafs", std::ios::in | std::ios::binary | std::ios::ate); - BOOST_REQUIRE(file.is_open()); - - std::streampos size; - char * buffer; - size = file.tellg(); - buffer = new char [size]; - - file.seekg(0, std::ios::beg); - file.read(buffer, size); - file.close(); - for(unsigned int i = 0; i < size; i++) { - BOOST_CHECK_EQUAL(buffer[i], reference[i]); + { + size_t written = fasta_to_fastafs("test/data/test.fa", "tmp/test_cache_test.fastafs", false); + BOOST_CHECK_EQUAL(written, 403); + + static std::string reference = + // GENERIC-HEADER + "\x0F\x0A\x46\x53"s// [0, 3] + "\x00\x00\x00\x00"s// [4, 7] version + "\x80\x00"s// [8, 9] FASTAFS flag [ 10000000 | 00000000 ] + "\x00\x00\x01\x37"s // [10, 13] index position in file (153) + + // DATA + "\x00\x00\x00\x10"s// [14, 17] seq length (16) (of 2bit encoded bytes; n-blocks are excluded) + "\x00\x55\xAA\xFF"s// [18, 21] sequence + "\x00\x00\x00\x00"s// [22, 25] n-blocks (0) + "\x75\x25\x5C\x6D\x90\x77\x89\x99\xAD\x36\x43\xA2\xE6\x9D\x43\x44"s// [26, 41] checksum + "\x00\x00\x00\x01"s// [42, ] m-blocks (1) + "\x00\x00\x00\x00"s// [50, 53] m-block starts (0) + "\x00\x00\x00\x0F"s// [54, 57] m-block starts (15) + "\x00\x00\x00\x0C"s// [58, 61] seq length (12) (of 2bit encoded bytes; n-blocks are excluded) + "\x93\x93\x93"s// [62, 64] sequence: ACTG ACTG nnnn ACTG = 10010011 10010011 00000000 10010011 = \x93 \x93 \x00 \x93 + "\x00\x00\x00\x01"s// [65, 68] n-blocks (1) + "\x00\x00\x00\x08"s// [69, 72] n-block start[1] (08) + "\x00\x00\x00\x0B"s// [73, 76] n-block ends[1] (11) + "\x8B\x56\x73\x72\x4A\x99\x65\xC2\x9A\x1D\x76\xFE\x70\x31\xAC\x8A"s// [69, 96] checksum + "\x00\x00\x00\x01"s// [97, 100] m-blocks (0) + "\x00\x00\x00\x08"s// [101, 104] m-block starts (8) + "\x00\x00\x00\x0B"s// [105, 108] m-block starts (11) + "\x00\x00\x00\x0D"s// [109, 112] seq length (13) (needs to become 2bit-encoded seq-len) + "\x93\x93\xAA\x40"s// [113, 116] sequence: last one is 01 00 00 00 + "\x00\x00\x00\x00"s// [117, 120] n-blocks (0) + "\x61\xDE\xBA\x32\xEC\x4C\x35\x76\xE3\x99\x8F\xA2\xD4\xB8\x72\x88"s// [121, 140] checksum + "\x00\x00\x00\x01"s// [141, 144] m-blocks (0) + "\x00\x00\x00\x08"s// [145, 148] m-block starts (8) + "\x00\x00\x00\x0C"s// [149, 152] m-block starts (12) + "\x00\x00\x00\x0E"s// [153, 156] seq length (14) (of 2bit encoded bytes; n-blocks are excluded) + "\x93\x93\xAA\x50"s// [157, 160] last one is 01 01 00 00 + "\x00\x00\x00\x00"s// [161, 164] n-bocks (0) + "\x99\xB9\x05\x60\xF2\x3C\x1B\xDA\x28\x71\xA6\xC9\x3F\xD6\xA2\x40"s// [165, 184] checksum + "\x00\x00\x00\x01"s// [185, 188] m-blocks (0) + "\x00\x00\x00\x08"s// [189, 192] m-block starts (8) + "\x00\x00\x00\x0D"s// [193, 196] m-block starts (13) + "\x00\x00\x00\x0F"s// [197, 200] seq length (15) (of 2bit encoded nucleotides; n-blocks are excluded) + "\x93\x93\xAA\x54"s// [201, 204] last one is 01 01 01 00 + "\x00\x00\x00\x00"s// [205, 208] n-blocks (0) + "\x36\x25\xAF\xDF\xBE\xB4\x37\x65\xB8\x5F\x61\x2E\x0A\xCB\x47\x39"s// [209, 228] checksum + "\x00\x00\x00\x01"s// [229, 232] m-blocks (0) + "\x00\x00\x00\x08"s// [233, 236] m-block starts (8) + "\x00\x00\x00\x0E"s// [237, 240] m-block starts (14) + "\x00\x00\x00\x04"s// [241, 244] seq length (4) (of 2bit encoded nucleotides; n-blocks are excluded) + "\x93"s// [245, 245] sequence: ACTG NNNN = 10010011 00000000 + "\x00\x00\x00\x01"s// [246, 249] n-blocks (1) + "\x00\x00\x00\x04"s// [250, 253] n-starts [1] (4) + "\x00\x00\x00\x07"s// [254, 257] n-ends [1] (7) + "\xBD\x8C\x08\x0E\xD2\x5B\xA8\xA4\x54\xD9\x43\x4C\xB8\xD1\x4A\x68"s// [258, 277] checksum + "\x00\x00\x00\x01"s// [278, 281] m-blocks (0) + "\x00\x00\x00\x04"s// [282, 285] m-block starts (4) + "\x00\x00\x00\x07"s// [286, 289] m-block starts (7) + "\x00\x00\x00\x04"s// [290, 293] seq length (4) (of 2bit encoded nucleotides; n-blocks are excluded) + "\x93"s// [294, 294] sequence: NNAC TG?? = 00001001 00110000 + "\x00\x00\x00\x01"s// [295, 298] n-blocks (1) + "\x00\x00\x00\x00"s// [299, 302] n-starts[1] (0) + "\x00\x00\x00\x01"s// [303, 306] n-ends[1] (1) + "\x98\x0E\xF3\xA1\xCD\x80\xAF\xEC\x95\x9D\xCF\x85\x2D\x02\x62\x46"s// [307, 326] checksum + "\x00\x00\x00\x01"s// [327, 330] m-blocks (0) + "\x00\x00\x00\x00"s// [331, 334] m-block starts (0) + "\x00\x00\x00\x01"s// [335, 338] m-block starts (1) + + // INDEX + "\x00\x00\x00\x07"s // [339, 342] 7 sequences + "\x010\x00" // [343, 344] complete, DNA and not circular + "\x04"s "chr1"s // [345, 349] name + "\x00\x00\x00\x0E"s // [350, 353] data position in file (14) + "\x010\x00" // [354, 355] complete, DNA and not circular + "\x04"s "chr2"s // [356, 360] name + "\x00\x00\x00\x36"s // [361, 364] data position in file (54) + "\x010\x00" // [, ] complete, DNA and not circular + "\x06"s "chr3.1"s // [, ] name + "\x00\x00\x00\x65"s // [, ] data position in file (101) + "\x010\x00" // [, ] complete, DNA and not circular + "\x06"s "chr3.2"s // [, ] name + "\x00\x00\x00\x8D"s // [, ] data position in file (141) + "\x010\x00" // [, ] complete, DNA and not circular + "\x06"s "chr3.3"s // [, ] name + "\x00\x00\x00\xB5"s // [, ] data position in file (181) + "\x010\x00" // [, ] complete, DNA and not circular + "\x04"s "chr4"s // [, ] name + "\x00\x00\x00\xDD"s // [, ] data position in file (221) + "\x010\x00" // [, ] complete, DNA and not circular + "\x04"s "chr5"s // [, ] name + "\x00\x00\x01\x0A"s // [, ] data position in file (290) + + // METADATA + "\x00"s // [399] no metadata fields [padding will come soon?] + + // CRC32 checksums + "\x1e\x77\x77\x22"s + ; + + BOOST_REQUIRE_EQUAL(reference.length(), 403); + + //BOOST_CHECK(output.compare(uppercase) == 0 or output.compare(mixedcase) == 0); + std::ifstream file("tmp/test_cache_test.fastafs", std::ios::in | std::ios::binary | std::ios::ate); + BOOST_REQUIRE(file.is_open()); + + std::streampos size = file.tellg(); + char *buffer = new char[size]; + BOOST_REQUIRE(buffer != nullptr); + + file.seekg(0, std::ios::beg); + file.read(buffer, size); + file.close(); + for(unsigned int i = 0; i < size; i++) { + BOOST_CHECK_EQUAL(buffer[i], reference[i]); + /* + printf("comparing char %i\n", i); + if(reference[i] != buffer[i]) { + printf(" ** mismatch [%d] [ref] %d != [buf] %d (%c x %02hhX)\n", i, reference[i], buffer[i], buffer[i], buffer[i]); + } + */ - /* - printf("comparing char %i\n", i); - if(reference[i] != buffer[i]) { - printf(" ** mismatch [%d] [ref] %d != [buf] %d (%c x %02hhX)\n", i, reference[i], buffer[i], buffer[i], buffer[i]); - } - */ + } + delete[] buffer; } - delete[] buffer; - - - // check computed file size - fastafs f = fastafs(""); - f.load("tmp/test_cache_test.fastafs"); - BOOST_CHECK_EQUAL(f.fastafs_filesize(), 403); + { + // check computed file size + printf("test0\n"); + fastafs f = fastafs(""); + printf("test1\n"); + f.load("tmp/test_cache_test.fastafs"); + printf("test2\n"); + BOOST_CHECK_EQUAL(f.fastafs_filesize(), 403); + printf("test3\n"); + } } @@ -373,6 +379,7 @@ BOOST_AUTO_TEST_CASE(test_cache) */ BOOST_AUTO_TEST_CASE(test_cache_forwards_backwards) { + printf("test4\n"); // generate FASTAFS file from FASTA file fasta_to_fastafs("test/data/test.fa", "tmp/test_cache_test.fastafs", false); @@ -1048,7 +1055,7 @@ BOOST_AUTO_TEST_CASE(test_cache_protein2) BOOST_CHECK_EQUAL(buffer[i], reference[i]); if(reference[i] != buffer[i]) { - printf("comparing char %u ** mismatch [ref] %d %02hhX != [buf] (%u x %02hhX)\n", i, reference[i], reference[i], buffer[i], (unsigned char) buffer[i], buffer[i]); + printf("comparing char %u ** mismatch [ref] %d %02hhX != [buf] (%u x %02hhX)\n", i, reference[i], reference[i], buffer[i], (unsigned char) buffer[i]); } } diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index 6a0a7e97..db8b11be 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -1,5 +1,5 @@ -#define BOOST_TEST_MODULE fastfs_test_chunked_reader +#define BOOST_TEST_MODULE fastfs_test_chunked_reader_old #include @@ -17,7 +17,7 @@ -void flush_buffer(char *buffer, size_t n, char fill) +void flush_buffer(unsigned char *buffer, size_t n, unsigned char fill) { for(size_t i = 0; i < n; i++) { buffer[i] = fill; @@ -30,7 +30,7 @@ BOOST_AUTO_TEST_SUITE(Testing) -BOOST_AUTO_TEST_CASE(test_chunked_reading_small_file) +BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) { std::string test_name = "test"; std::string fasta_file = "test/data/" + test_name + ".fa"; @@ -44,7 +44,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reading_small_file) (unsigned) ZSTD_SEEKABLE_FRAME_SIZE); - char buffer[READ_BUFFER_SIZE + 1]; + unsigned char buffer[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); std::string std_buffer; buffer[1024] = '\0'; @@ -55,92 +55,316 @@ BOOST_AUTO_TEST_CASE(test_chunked_reading_small_file) std::string reference3 = "\x0a\x46\x53\x00"s; { - chunked_reader r_flat = chunked_reader(fastafs_file.c_str()); - written = r_flat.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 403); - std_buffer = std::string(buffer, written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - written = r_flat.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 0); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - // test what happens when file is closed - written = r_flat.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 0); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - - // test seek stuff - r_flat.seek(0); // reset to first pos in file - BOOST_CHECK_EQUAL(r_flat.tell(), 0); - - written = r_flat.read(buffer, 4); - BOOST_CHECK_EQUAL(written, 4); - BOOST_CHECK_EQUAL(r_flat.tell(), 4); - std_buffer = std::string(buffer, written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - - r_flat.seek(1); // reset to first pos in file - BOOST_CHECK_EQUAL(r_flat.tell(), 1); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - written = r_flat.read(buffer, 4); - BOOST_CHECK_EQUAL(written, 4); - BOOST_CHECK_EQUAL(r_flat.tell(), 5); - std_buffer = std::string(buffer, written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + // Context equivalent - uncompressed + chunked_reader c1(fastafs_file.c_str()); + c1.fopen(0); + BOOST_CHECK(c1.typeid_state() == typeid(ContextUncompressed)); + BOOST_CHECK(c1.typeid_state() != typeid(ContextZstdSeekable)); + + // Context equivalent - compressed + chunked_reader c2(fastafs_file_zstd.c_str()); + c2.fopen(0); + BOOST_CHECK(c2.typeid_state() == typeid(ContextZstdSeekable)); + BOOST_CHECK(c2.typeid_state() != typeid(ContextUncompressed)); + + + // Context equivalent - uncompressed + { + BOOST_CHECK_EQUAL(c1.tell(), 0); + written = c1.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 403); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + BOOST_CHECK_EQUAL(c1.tell(), 403); + + BOOST_CHECK(c1.typeid_state() == typeid(ContextUncompressed)); + BOOST_CHECK(c1.typeid_state() != typeid(ContextZstdSeekable)); + } + + // Context equivalent - compressed zstd + { + BOOST_CHECK_EQUAL(c2.tell(), 0); + + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 403); + + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); + + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + BOOST_CHECK_EQUAL(c2.tell(), 403); + + BOOST_CHECK(c2.typeid_state() != typeid(ContextUncompressed)); + BOOST_CHECK(c2.typeid_state() == typeid(ContextZstdSeekable)); + } + + + + // Context equivalent - uncompressed + { + written = c1.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + BOOST_CHECK_EQUAL(c1.tell(), 403); + + written = c1.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + BOOST_CHECK_EQUAL(c1.tell(), 403); + } + + // Context equivalent - compressed zstd + { + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + BOOST_CHECK_EQUAL(c2.tell(), 403); + + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + BOOST_CHECK_EQUAL(c2.tell(), 403); + } + + + { + // Context equivalent - uncompressed + BOOST_CHECK_EQUAL(c1.tell(), 403); + c1.seek(0); + BOOST_CHECK_EQUAL(c1.tell(), 0); + c1.seek(1); + BOOST_CHECK_EQUAL(c1.tell(), 1); + c1.seek(402); + BOOST_CHECK_EQUAL(c1.tell(), 402); + } + { + // Context equivalent - compressed zstd + BOOST_CHECK_EQUAL(c2.tell(), 403); + c2.seek(0); + BOOST_CHECK_EQUAL(c2.tell(), 0); + c2.seek(1); + BOOST_CHECK_EQUAL(c2.tell(), 1); + c2.seek(402); + BOOST_CHECK_EQUAL(c2.tell(), 402); + } + + + { + // Context equivalent - uncompressed + c1.seek(0); + BOOST_CHECK_EQUAL(c1.tell(), 0); + written = c1.read(buffer, 4); + BOOST_CHECK_EQUAL(written, 4); + + BOOST_CHECK_EQUAL(c1.tell(), 4); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + { + // Context equivalent - compressed zstd + c2.seek(0); + BOOST_CHECK_EQUAL(c2.tell(), 0); + written = c2.read(buffer, 4); + BOOST_CHECK_EQUAL(written, 4); + + BOOST_CHECK_EQUAL(c2.tell(), 4); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + + + + { + // Context equivalent - uncompressed + c1.seek(1); + BOOST_CHECK_EQUAL(c1.tell(), 1); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + written = c1.read(buffer, 4); + BOOST_CHECK_EQUAL(written, 4); + BOOST_CHECK_EQUAL(c1.tell(), 5); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); + if(std_buffer.compare(reference3) != 0) { + printf("%u != %u\n", (unsigned int) reference3.size(), (unsigned int) std_buffer.size()); + printf("%s != %s\n", reference3.c_str(), std_buffer.c_str()); + + printf("[%u][%u][%u][%u]\n", (unsigned char) reference3[0], reference3[1], reference3[2], reference3[3]); + printf("[%u][%u][%u][%u]\n", (unsigned char) buffer[0], buffer[1], buffer[2], buffer[3]); + } + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + { + // Context equivalent - compressed zstd + c2.seek(1); + BOOST_CHECK_EQUAL(c2.tell(), 1); + + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + written = c2.read(buffer, 4); + BOOST_CHECK_EQUAL(written, 4); + BOOST_CHECK_EQUAL(c2.tell(), 5); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL(std_buffer.size(), reference3.size()); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); + if(std_buffer.compare(reference3) != 0) { + printf("%u != %u\n", (unsigned int) reference3.size(), (unsigned int) std_buffer.size()); + printf("%s != %s\n", reference3.c_str(), std_buffer.c_str()); + + printf("[%u][%u][%u][%u]\n", (unsigned char) reference3[0], reference3[1], reference3[2], reference3[3]); + printf("[%u][%u][%u][%u]\n", (unsigned char) buffer[0], buffer[1], buffer[2], buffer[3]); + } + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + + { + // Context equivalent - uncompressed + c1.seek(1024 * 1024); + } + { + // Context equivalent - compressed zstd + c2.seek(1024 * 1024); + } } { - chunked_reader r_zstd = chunked_reader(fastafs_file_zstd.c_str()); - - written = r_zstd.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 403); - std_buffer = std::string(buffer, written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - written = r_zstd.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 0); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + // Context equivalent - uncompressed + chunked_reader c1(fastafs_file.c_str()); + c1.fopen(0); + BOOST_CHECK(c1.typeid_state() == typeid(ContextUncompressed)); + BOOST_CHECK(c1.typeid_state() != typeid(ContextZstdSeekable)); + + // Context equivalent - compressed + chunked_reader c2(fastafs_file_zstd.c_str()); + c2.fopen(0); + BOOST_CHECK(c2.typeid_state() == typeid(ContextZstdSeekable)); + BOOST_CHECK(c2.typeid_state() != typeid(ContextUncompressed)); + + + + { + // C1 + written = c1.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 403); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + { + // C2 + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 403); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + + + { + // C1 + written = c1.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + { + // C2 + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } // test what happens when file is closed - written = r_zstd.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 0); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { + // C1 + written = c1.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + { + // C2 + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + // test seek stuff - r_zstd.seek(0); // reset to first pos in file - BOOST_CHECK_EQUAL(r_zstd.tell(), 0); - - written = r_zstd.read(buffer, 4); - BOOST_CHECK_EQUAL(written, 4); - BOOST_CHECK_EQUAL(r_zstd.tell(), 4); - std_buffer = std::string(buffer, written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - - r_zstd.seek(1); // reset to first pos in file - BOOST_CHECK_EQUAL(r_zstd.tell(), 1); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - written = r_zstd.read(buffer, 4); - BOOST_CHECK_EQUAL(written, 4); - BOOST_CHECK_EQUAL(r_zstd.tell(), 5); - std_buffer = std::string(buffer, written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { + // C1 + c1.seek(0); // reset to first pos in file + BOOST_REQUIRE_EQUAL(c1.tell(), 0); + } + { + // C2 + c2.seek(0); // reset to first pos in file + BOOST_REQUIRE_EQUAL(c2.tell(), 0); + } + + + + { + // C1 + written = c1.read(buffer, 4); + BOOST_CHECK_EQUAL(written, 4); + BOOST_CHECK_EQUAL(c1.tell(), 4); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + { + // C2 + written = c2.read(buffer, 4); + BOOST_CHECK_EQUAL(written, 4); + BOOST_CHECK_EQUAL(c2.tell(), 4); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + + + { + // C1 + c1.seek(1); // reset to first pos in file + BOOST_CHECK_EQUAL(c1.tell(), 1); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + { + // C2 + c2.seek(1); // reset to first pos in file + BOOST_CHECK_EQUAL(c2.tell(), 1); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + + + { + // C1 + written = c1.read(buffer, 4); + BOOST_CHECK_EQUAL(written, 4); + BOOST_CHECK_EQUAL(c1.tell(), 5); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + { + // C2 + written = c2.read(buffer, 4); + BOOST_CHECK_EQUAL(written, 4); + BOOST_CHECK_EQUAL(c2.tell(), 5); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } } } -BOOST_AUTO_TEST_CASE(test_chunked_reading_large_file) +BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) { // this file needs two buffers as its size is 1593 @@ -156,7 +380,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reading_large_file) (unsigned) ZSTD_SEEKABLE_FRAME_SIZE); - char buffer[READ_BUFFER_SIZE + 1]; + unsigned char buffer[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); std::string std_buffer; size_t written; @@ -170,137 +394,276 @@ BOOST_AUTO_TEST_CASE(test_chunked_reading_large_file) { - chunked_reader r_flat = chunked_reader(fastafs_file.c_str()); - - written = r_flat.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 1024); - std_buffer = std::string(buffer, written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content 1st read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - written = r_flat.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 569); - std_buffer = std::string(buffer, written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - written = r_flat.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 0); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - written = r_flat.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 0); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - - // set back - r_flat.seek(1024); - - written = r_flat.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 569); - std_buffer = std::string(buffer, written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - - // set back - r_flat.seek(4); - - written = r_flat.read(buffer, 1024);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 1024); - std_buffer = std::string(buffer, written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - written = r_flat.read(buffer, 1024);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 565); - std_buffer = std::string(buffer, written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference4), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - - r_flat.seek(4); - - written = r_flat.read(buffer, 4);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 4); - std_buffer = std::string(buffer, written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference5), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - written = r_flat.read(buffer, 1024);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 1024); - std_buffer = std::string(buffer, written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference6), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + // Context equivalent - uncompressed + chunked_reader c1(fastafs_file.c_str()); + c1.fopen(0); + BOOST_CHECK(c1.typeid_state() == typeid(ContextUncompressed)); + BOOST_CHECK(c1.typeid_state() != typeid(ContextZstdSeekable)); + + // Context equivalent - compressed + chunked_reader c2(fastafs_file_zstd.c_str()); + c2.fopen(0); + BOOST_CHECK(c2.typeid_state() == typeid(ContextZstdSeekable)); + BOOST_CHECK(c2.typeid_state() != typeid(ContextUncompressed)); + + + { + // C1 + written = c1.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 1024); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content 1st read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + { + // C2 + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 1024); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content 1st read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + + + { + // C1 + written = c1.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 569); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + { + // C2 + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 569); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + + + { + // C1 + written = c1.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + { + // C2 + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + + + { + // C1 + written = c1.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + { + // C2 + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + + // set back / seek + { + // C1 + c1.seek(1024); + } + { + // C2 + c2.seek(1024); + } + + + + { + // C1 + written = c1.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 569); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + { + // C2 + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 569); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + + + { + // C1 + c1.seek(4); + } + { + // C2 + c2.seek(4); + } + + + + { + // C1 + written = c1.read(buffer, 1024);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 1024); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + { + // C2 + written = c2.read(buffer, 1024);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 1024); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + + { + // C1 + written = c1.read(buffer, 1024);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 565); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference4), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + { + // C2 + written = c2.read(buffer, 1024);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 565); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference4), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + + { + // C1 + c1.seek(4); + } + { + // C2 + c2.seek(4); + } + + + + { + // C1 + written = c1.read(buffer, 4);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 4); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference5), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + { + // C2 + written = c2.read(buffer, 4);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 4); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference5), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + + + { + // C1 + written = c1.read(buffer, 1024);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 1024); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference6), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + { + // C2 + written = c2.read(buffer, 1024);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 1024); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference6), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } } +} - { - chunked_reader r_zstd = chunked_reader(fastafs_file_zstd.c_str()); - - written = r_zstd.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 1024); - std_buffer = std::string(buffer, written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content 1st read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - written = r_zstd.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 569); - std_buffer = std::string(buffer, written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - written = r_zstd.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 0); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - written = r_zstd.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 0); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - - // set back - r_zstd.seek(1024); - written = r_zstd.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 569); - std_buffer = std::string(buffer, written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); +BOOST_AUTO_TEST_CASE(test_chunked_reader_old__new_style) +{ + // this file needs two buffers as its size is 1593 + std::string test_name = "test_007"; + std::string fasta_file = "test/data/" + test_name + ".fa"; + std::string fastafs_file = "tmp/" + test_name + ".fastafs"; + std::string fastafs_file_zstd = "tmp/" + test_name + ".fastafs.zst"; - // set back - r_zstd.seek(4); + fasta_to_fastafs(fasta_file, fastafs_file, false); + ZSTD_seekable_compressFile_orDie((const char*) fastafs_file.c_str(), + (const char*) fastafs_file_zstd.c_str(), + (int) ZSTD_COMPRESSION_QUALIITY, + (unsigned) ZSTD_SEEKABLE_FRAME_SIZE); - written = r_zstd.read(buffer, 1024);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 1024); - std_buffer = std::string(buffer, written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - written = r_zstd.read(buffer, 1024);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 565); - std_buffer = std::string(buffer, written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference4), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + unsigned char buffer[READ_BUFFER_SIZE + 1]; + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + std::string std_buffer; + size_t written; + std::string reference1 = "\x0f\x0a\x46\x53\x00\x00\x00\x00\x80\x00\x00\x00\x06\x20\x00\x00\x17\xd7\xf4\xbd\xdd\x5d\x39\xcc\xce\x7e\xe8\x6e\x9d\x92\x70\x2d\x96\x68\x9f\xba\x83\xe1\x99\x2d\x9f\xe4\xed\x65\x3f\x09\x88\x5d\x28\x5c\xc0\x99\x36\x80\x87\xdc\x02\xc0\xe5\x5a\xef\xae\x56\x95\x59\x91\xb6\xde\x35\xf4\x1c\x60\x1e\x30\xd1\x77\x1c\x70\x2d\xda\xed\xc5\xfc\x58\x8a\x28\x94\x2b\x4f\x96\x97\x18\xa0\x65\x22\x48\xa6\x06\x1b\x65\x7f\xf4\x82\x8f\xe3\x05\xde\x00\x70\xb7\xb5\xa4\x1e\xc3\x43\xe9\x49\x92\x8b\x47\xa6\xdd\x97\xd4\x93\x4d\xb4\xd0\x76\xc7\x4d\xeb\x71\x48\x77\x43\x91\xcd\xe5\x8f\x8d\xa2\xcb\x28\x53\xcf\x82\xa4\xd5\x85\x78\xae\x37\xd9\x19\x13\x54\x52\x0c\x7d\xcb\x2a\xfd\x1b\x38\x66\xaa\xd3\x23\xe6\xf7\x20\xd5\x0a\xf1\x4b\x59\xe6\x0b\xbe\x42\xa9\x5e\x7d\xce\xec\x73\xd9\x8b\xc6\x4b\x35\xe4\x69\xbc\x10\x35\x8a\x0e\x09\x2b\xf1\x9f\x38\x15\x57\x21\x08\xe1\xa6\x6e\xf1\x8c\x52\x08\x1b\x85\x50\xe0\x1e\x01\x35\x3a\x0a\x72\x1b\xb3\xda\xfd\x78\x36\x10\xb7\x1a\x2e\x93\xd4\x63\xab\x0b\x98\xfb\x4b\x97\x47\x7f\x61\x0f\x36\x7f\xfe\x02\x36\x2e\x30\xa5\xdb\x8f\xde\xd0\xc0\xc6\x9c\x3a\x7b\x71\x24\x1e\xc3\x04\xac\x31\x7a\xf5\xf3\x33\x26\x99\xa6\x4f\x43\x6c\x46\x5c\x4d\xf5\xb8\x43\x1d\xd7\x73\x3e\xe9\xb6\x3b\xdf\xff\xf6\xf3\x2a\x34\x3f\x39\x60\x4b\xed\xde\xf4\x2f\x5d\xe7\xab\xfe\xa1\x4d\x11\x9c\xcc\x41\xf8\x3c\xdd\x18\xea\xea\x45\x3e\xa5\x0b\xb5\x7b\x38\x5e\x26\x72\xdd\x24\x51\x48\xcf\x79\xa7\xd9\x06\x2e\xe8\xfb\x5d\x3d\x4a\x81\x0d\x15\x48\xd0\x84\x15\x0f\x15\x5b\xc3\x9d\x48\xc4\x9e\x2f\x45\xd4\x1c\x24\xc4\x90\x60\xe6\xa1\x19\x6f\x2d\x3a\xf9\x52\x0d\x06\x93\x21\xb4\xc2\x43\xd7\xce\x5b\xaa\x42\x20\x35\x6c\x45\xa2\xea\xd8\xe0\xc7\x90\xe3\x4a\x3e\xb1\x65\xaf\x5c\xe3\x23\x58\x65\x88\x92\x0b\x98\xc4\x3f\x7b\xb4\x42\x6c\x77\x7f\xf3\x51\x1a\x17\x89\x1f\x03\x66\x95\xbb\x83\x3f\xfb\xd1\x8c\x46\x40\x7f\xd1\xff\x7a\xbb\xb2\xcc\xaa\xc9\xfe\xbe\x7a\xff\x5b\xf7\x17\xe0\x4d\xca\x6b\xf2\xef\x0f\x0c\x48\x90\x5c\x6d\xa4\x53\xf4\xfb\xe3\xfe\x38\x61\xdb\x32\xe6\x6e\x35\x86\xad\xad\x33\x3b\x7f\x92\x7f\xf9\x4c\xbd\x92\xbe\x41\x4f\x23\x37\xa2\x6e\xd9\x7d\x82\x47\xa4\x8f\x77\x51\xdb\x2f\xd6\xda\xcb\x1d\x7b\x2b\xe3\x29\x6f\x03\xad\xce\x05\xa7\xab\x34\x52\xb8\x94\xd3\x08\x5b\x9f\x0d\xec\x27\x09\xce\xb5\x82\x89\x43\xe0\xc3\xc3\x7f\xad\xeb\x30\x0a\x5c\xa8\x88\xc8\x38\x02\x18\x4d\xda\x80\x02\xf5\xb0\x0b\xbf\x3b\xbc\x11\x6b\xe7\xfd\x4b\x4a\xe9\x48\x31\x9f\x3a\x83\x80\x7b\x21\x73\xf8\x99\x43\x1b\xd6\x1a\xb6\xce\xe4\xff\x0e\x58\x33\x86\xd0\x09\x70\x14\x63\xc6\x45\x8f\x2a\x5f\xc8\xb2\x82\xdc\x4f\x99\x81\xa8\x87\xe4\xbf\xc5\xfe\x35\x81\x73\x63\x21\xf1\x82\xdb\x73\xfe\xe2\x1b\x5f\xff\x07\x8b\xb4\xef\xb6\x6a\x92\x9c\xcf\x6d\x09\xb1\xc1\x78\xa4\x56\x37\xe4\x6a\xf9\x01\x1e\x8c\x51\x14\x10\x34\xbd\xb0\x4f\xc6\xcb\xd6\xf4\xee\xed\x7c\x23\xa2\x80\xde\x5d\x76\x9d\x09\xd8\x1d\x45\x21\xc1\xad\xe9\x74\xf2\x61\xd4\x0b\xc7\x0d\x6a\xab\x25\x7c\x19\xa3\xf0\x88\x87\x7b\xba\xf0\x37\x3f\x59\x8f\x7f\x8e\x25\xbb\x80\x70\xf2\xe3\xf5\x0a\xa5\xb5\x2c\x43\x6f\xf1\x7b\xd3\x48\x86\x9a\xa2\xb1\x42\x89\xf3\x00\x0e\x9d\x99\xca\x5e\xb0\x2a\xf7\x46\xe6\xfb\xb9\x22\xc9\x14\xb9\x75\x95\x82\x87\x0d\x9a\x54\x80\xf6\xbc\x1f\xd9\xcb\x09\x0c\x4b\x5e\x38\xa1\x10\xaa\x32\xb1\xfa\xcc\xba\x37\x37\x01\x6d\x7f\xf1\x9d\x49\x35\x6a\x5b\xec\xec\xfb\x6a\x46\xca\x41\x03\x35\xfb\x56\xef\x5b\xe2\x44\xa0\x9e\xf8\x99\xde\x92\x17\x12\x98\x5e\x11\xe0\x73\x94\x23\xc9\x81\x61\xcc\x8a\xb4\x72\x5d\x6e\x1b\xfb\xa4\x3c\x79\x06\x12\xd3\x00\x47\xa7\x8e\x8c\x42\x9d\xa4\xfd\x34\xcd\xf0\x94\xdc\x3c\x84\xe3\xf7\xfc\x16\xd8\x0d\x4a\x9d\x05\xe1\xff\x1b\x47\xf1\xdc\xdf\xa4\x86\x09\xc1\xfe\xde\x45\xe4\x43\xfd\x0d\x05\xf4\x3f\xb5\x2e\xe7\x48\xde\xc8\x2b\x8a\x5f\xee\x28\x66\x09\xb4\x65\x12\x77\x23\x6a\xe2\x80\xa4\xc2\xa5\x1e\xbe\xd9\x8e\xae\x56\x4d\x56\xfe\xed\xe8\x0e\x39\xab\xba\x68\xfd\x39\x2c\x22\x30\x80\x31\xfe\x34\x46\x7d\xea\x3c\x8e\x5b\x87\xef\xac\x2d\xe3\x80\x19\x5a\xd8\xba\x63\xd5\xb4\x59\xc0\x38\xff\xc5\xd8\x00\x75\x8e\x31\x7c\x1f\x90\x98\xdc\x4a\x9c\x67\x84\x12\x87\xb2\x06\xcc\x5c\x41\xc4\xa2\x22\x88\x2d\xf5\x43\xdc\x5f\xe8\x71\xa0\x0f\xbd\xa8\x33\x6f\x83\xbf\xc0\x3a\xfd\xa7\xf9\x8a\x93\x12\x94\x0a\x9e\x39\x68\x60\xc2\xfe\x0a\x2c\x13\xb6\x25\x5a\x85\x62\x1c\x5b"s;// xxd -p + std::string reference2 = "\x44\x2c\x05\x5b\xe6\x92\x56\x6b\x2f\xf6\x4f\xfb\xdc\x46\x9c\xe2\xbd\xac\xc0\x0d\x53\x44\x4d\x29\xd3\xe3\x61\x06\x77\xfb\x0c\x1b\xfa\x05\x17\x3b\x32\xc8\x6c\xd3\x0e\xa8\x18\xde\x64\xfb\x8a\xb8\x84\xf6\x3f\x17\xc4\x1f\xea\x8c\xea\xd5\x42\xc1\xb3\xdb\x68\x90\x8a\x24\x2f\x0c\xc5\x9b\xb6\xd6\x16\x5d\x3d\x38\xf1\xf6\x80\xf2\x56\x47\xf3\x95\x64\x7e\x50\x14\x02\x73\xa9\x0a\x04\x01\xcc\xf3\x1b\x3c\x9a\xfd\x98\x86\xdf\x54\xe6\x36\x50\xe9\xc0\x46\xd7\xae\x54\xd1\xe4\xaf\x98\xc3\xa6\xee\x44\xce\x8c\x16\xdf\x33\x87\x0b\xca\x12\x91\xac\xa4\xbe\x4e\xdb\xb2\x32\x21\x21\x16\xdb\x0c\x5f\xe3\x33\xbd\xa9\x8a\x88\xed\x3e\x65\x46\x4d\x8b\x16\xf0\x73\xe7\x76\x3d\x42\xb5\xe1\xba\x14\xe8\xd9\x99\x4f\x67\xc2\x20\x0d\x41\x07\x27\x61\x3a\x28\x49\x6f\x73\xdb\x44\xdb\xe2\x5e\x54\x4e\x1c\xe0\xd4\x66\x1e\xfe\x0c\x96\x52\xb3\x79\x00\x9d\x87\xed\xee\xc6\x82\x5e\xdc\x8f\xcd\xc8\xaa\x1c\x44\x76\x22\x14\x99\xef\x56\x73\x0e\x93\x14\x77\xa3\xa4\x52\xa7\xad\x55\x6c\xe2\x1a\x6a\x57\xd1\xb8\x4a\x8f\x3a\xa9\xcf\xab\x20\x25\xc8\xa8\x13\x30\x3c\x78\xbd\x3e\x9d\x73\x8f\xd9\x10\x9c\x15\xa8\x8a\x58\x70\x34\x38\xbb\xff\x26\x6d\x42\xcd\x2f\x8f\x7c\x20\x39\xa5\x37\x70\xf1\x1f\x65\x8a\xc5\xa3\x4f\x02\x57\x35\x17\x1b\x91\xa2\xa6\xd4\x67\x1c\x54\xde\xb4\xaf\x53\x99\x92\x23\xc1\x3d\xcc\x62\x9c\x21\xd9\xb5\xde\x5f\xd6\x1e\xa5\x4a\x45\x7e\x10\x74\xc4\x9e\x7f\x3b\xdd\xf6\x6c\xb6\xf2\xc9\xb6\xbe\x01\x45\x2e\x4a\x3b\xaf\x41\x05\x91\x38\x68\x35\x36\x0e\x1a\xc7\xc9\x52\x6d\xc1\x9c\x9e\x50\x29\x7b\x3e\xe0\x39\x67\x32\xe8\xae\xaa\xac\x0c\xbb\x18\x4c\x11\x3b\x58\xc8\x80\x88\xf1\x6d\x7a\x3d\x36\xd0\x8e\xc1\xb1\xf8\xbb\xa9\xd1\xd6\x8f\x07\x6b\x12\x1a\x5b\xf1\xea\xed\x94\x1b\xe1\x1b\xe7\x0e\x75\x3d\x4e\xcf\x5b\x91\x2e\x78\x55\xd8\x8d\x1f\x1b\x09\x60\x38\xd2\xb8\xaa\x1f\xb5\x9d\x2c\xd4\x5c\x44\x78\x1f\x88\x4c\xaf\xa6\x2c\xeb\xca\x00\x51\xbe\xc9\x2e\x60\xaf\x0d\xb4\x02\xb3\x47\x0a\x3f\x4b\xbc\xc4\xa4\xff\xbb\xb3\x0e\x4f\xb3\xf0\x71\x3a\x84\x9a\x3d\x36\x33\x25\xeb\x2f\x76\x66\x5e\xc3\xd0\x66\xfc\xd4\x10\x3b\x78\x15\x61\x2d\xfc\xe6\x05\x7e\xda\x86\x43\x15\xb9\x78\xc2\x8b\x98\x42\x3e\x56\x42\x69\xba\xa2\xf3\x1e\xec\x00\x00\x00\x00\x21\x83\x67\xa8\x14\xed\xdc\x51\xeb\x96\x93\x98\x74\x4d\x13\x7c\x00\x00\x00\x00\x00\x00\x00\x01\x10\x00\x09\x6c\x65\x6e\x2d\x6c\x69\x6d\x69\x74\x00\x00\x00\x0e\x00\x98\x32\x91\x09"s;// xxd -p + std::string reference3 = "\x00\x00\x00\x00\x80\x00\x00\x00\x06\x20\x00\x00\x17\xd7\xf4\xbd\xdd\x5d\x39\xcc\xce\x7e\xe8\x6e\x9d\x92\x70\x2d\x96\x68\x9f\xba\x83\xe1\x99\x2d\x9f\xe4\xed\x65\x3f\x09\x88\x5d\x28\x5c\xc0\x99\x36\x80\x87\xdc\x02\xc0\xe5\x5a\xef\xae\x56\x95\x59\x91\xb6\xde\x35\xf4\x1c\x60\x1e\x30\xd1\x77\x1c\x70\x2d\xda\xed\xc5\xfc\x58\x8a\x28\x94\x2b\x4f\x96\x97\x18\xa0\x65\x22\x48\xa6\x06\x1b\x65\x7f\xf4\x82\x8f\xe3\x05\xde\x00\x70\xb7\xb5\xa4\x1e\xc3\x43\xe9\x49\x92\x8b\x47\xa6\xdd\x97\xd4\x93\x4d\xb4\xd0\x76\xc7\x4d\xeb\x71\x48\x77\x43\x91\xcd\xe5\x8f\x8d\xa2\xcb\x28\x53\xcf\x82\xa4\xd5\x85\x78\xae\x37\xd9\x19\x13\x54\x52\x0c\x7d\xcb\x2a\xfd\x1b\x38\x66\xaa\xd3\x23\xe6\xf7\x20\xd5\x0a\xf1\x4b\x59\xe6\x0b\xbe\x42\xa9\x5e\x7d\xce\xec\x73\xd9\x8b\xc6\x4b\x35\xe4\x69\xbc\x10\x35\x8a\x0e\x09\x2b\xf1\x9f\x38\x15\x57\x21\x08\xe1\xa6\x6e\xf1\x8c\x52\x08\x1b\x85\x50\xe0\x1e\x01\x35\x3a\x0a\x72\x1b\xb3\xda\xfd\x78\x36\x10\xb7\x1a\x2e\x93\xd4\x63\xab\x0b\x98\xfb\x4b\x97\x47\x7f\x61\x0f\x36\x7f\xfe\x02\x36\x2e\x30\xa5\xdb\x8f\xde\xd0\xc0\xc6\x9c\x3a\x7b\x71\x24\x1e\xc3\x04\xac\x31\x7a\xf5\xf3\x33\x26\x99\xa6\x4f\x43\x6c\x46\x5c\x4d\xf5\xb8\x43\x1d\xd7\x73\x3e\xe9\xb6\x3b\xdf\xff\xf6\xf3\x2a\x34\x3f\x39\x60\x4b\xed\xde\xf4\x2f\x5d\xe7\xab\xfe\xa1\x4d\x11\x9c\xcc\x41\xf8\x3c\xdd\x18\xea\xea\x45\x3e\xa5\x0b\xb5\x7b\x38\x5e\x26\x72\xdd\x24\x51\x48\xcf\x79\xa7\xd9\x06\x2e\xe8\xfb\x5d\x3d\x4a\x81\x0d\x15\x48\xd0\x84\x15\x0f\x15\x5b\xc3\x9d\x48\xc4\x9e\x2f\x45\xd4\x1c\x24\xc4\x90\x60\xe6\xa1\x19\x6f\x2d\x3a\xf9\x52\x0d\x06\x93\x21\xb4\xc2\x43\xd7\xce\x5b\xaa\x42\x20\x35\x6c\x45\xa2\xea\xd8\xe0\xc7\x90\xe3\x4a\x3e\xb1\x65\xaf\x5c\xe3\x23\x58\x65\x88\x92\x0b\x98\xc4\x3f\x7b\xb4\x42\x6c\x77\x7f\xf3\x51\x1a\x17\x89\x1f\x03\x66\x95\xbb\x83\x3f\xfb\xd1\x8c\x46\x40\x7f\xd1\xff\x7a\xbb\xb2\xcc\xaa\xc9\xfe\xbe\x7a\xff\x5b\xf7\x17\xe0\x4d\xca\x6b\xf2\xef\x0f\x0c\x48\x90\x5c\x6d\xa4\x53\xf4\xfb\xe3\xfe\x38\x61\xdb\x32\xe6\x6e\x35\x86\xad\xad\x33\x3b\x7f\x92\x7f\xf9\x4c\xbd\x92\xbe\x41\x4f\x23\x37\xa2\x6e\xd9\x7d\x82\x47\xa4\x8f\x77\x51\xdb\x2f\xd6\xda\xcb\x1d\x7b\x2b\xe3\x29\x6f\x03\xad\xce\x05\xa7\xab\x34\x52\xb8\x94\xd3\x08\x5b\x9f\x0d\xec\x27\x09\xce\xb5\x82\x89\x43\xe0\xc3\xc3\x7f\xad\xeb\x30\x0a\x5c\xa8\x88\xc8\x38\x02\x18\x4d\xda\x80\x02\xf5\xb0\x0b\xbf\x3b\xbc\x11\x6b\xe7\xfd\x4b\x4a\xe9\x48\x31\x9f\x3a\x83\x80\x7b\x21\x73\xf8\x99\x43\x1b\xd6\x1a\xb6\xce\xe4\xff\x0e\x58\x33\x86\xd0\x09\x70\x14\x63\xc6\x45\x8f\x2a\x5f\xc8\xb2\x82\xdc\x4f\x99\x81\xa8\x87\xe4\xbf\xc5\xfe\x35\x81\x73\x63\x21\xf1\x82\xdb\x73\xfe\xe2\x1b\x5f\xff\x07\x8b\xb4\xef\xb6\x6a\x92\x9c\xcf\x6d\x09\xb1\xc1\x78\xa4\x56\x37\xe4\x6a\xf9\x01\x1e\x8c\x51\x14\x10\x34\xbd\xb0\x4f\xc6\xcb\xd6\xf4\xee\xed\x7c\x23\xa2\x80\xde\x5d\x76\x9d\x09\xd8\x1d\x45\x21\xc1\xad\xe9\x74\xf2\x61\xd4\x0b\xc7\x0d\x6a\xab\x25\x7c\x19\xa3\xf0\x88\x87\x7b\xba\xf0\x37\x3f\x59\x8f\x7f\x8e\x25\xbb\x80\x70\xf2\xe3\xf5\x0a\xa5\xb5\x2c\x43\x6f\xf1\x7b\xd3\x48\x86\x9a\xa2\xb1\x42\x89\xf3\x00\x0e\x9d\x99\xca\x5e\xb0\x2a\xf7\x46\xe6\xfb\xb9\x22\xc9\x14\xb9\x75\x95\x82\x87\x0d\x9a\x54\x80\xf6\xbc\x1f\xd9\xcb\x09\x0c\x4b\x5e\x38\xa1\x10\xaa\x32\xb1\xfa\xcc\xba\x37\x37\x01\x6d\x7f\xf1\x9d\x49\x35\x6a\x5b\xec\xec\xfb\x6a\x46\xca\x41\x03\x35\xfb\x56\xef\x5b\xe2\x44\xa0\x9e\xf8\x99\xde\x92\x17\x12\x98\x5e\x11\xe0\x73\x94\x23\xc9\x81\x61\xcc\x8a\xb4\x72\x5d\x6e\x1b\xfb\xa4\x3c\x79\x06\x12\xd3\x00\x47\xa7\x8e\x8c\x42\x9d\xa4\xfd\x34\xcd\xf0\x94\xdc\x3c\x84\xe3\xf7\xfc\x16\xd8\x0d\x4a\x9d\x05\xe1\xff\x1b\x47\xf1\xdc\xdf\xa4\x86\x09\xc1\xfe\xde\x45\xe4\x43\xfd\x0d\x05\xf4\x3f\xb5\x2e\xe7\x48\xde\xc8\x2b\x8a\x5f\xee\x28\x66\x09\xb4\x65\x12\x77\x23\x6a\xe2\x80\xa4\xc2\xa5\x1e\xbe\xd9\x8e\xae\x56\x4d\x56\xfe\xed\xe8\x0e\x39\xab\xba\x68\xfd\x39\x2c\x22\x30\x80\x31\xfe\x34\x46\x7d\xea\x3c\x8e\x5b\x87\xef\xac\x2d\xe3\x80\x19\x5a\xd8\xba\x63\xd5\xb4\x59\xc0\x38\xff\xc5\xd8\x00\x75\x8e\x31\x7c\x1f\x90\x98\xdc\x4a\x9c\x67\x84\x12\x87\xb2\x06\xcc\x5c\x41\xc4\xa2\x22\x88\x2d\xf5\x43\xdc\x5f\xe8\x71\xa0\x0f\xbd\xa8\x33\x6f\x83\xbf\xc0\x3a\xfd\xa7\xf9\x8a\x93\x12\x94\x0a\x9e\x39\x68\x60\xc2\xfe\x0a\x2c\x13\xb6\x25\x5a\x85\x62\x1c\x5b\x44\x2c\x05\x5b"s;// xxd -p + std::string reference4 = "\xe6\x92\x56\x6b\x2f\xf6\x4f\xfb\xdc\x46\x9c\xe2\xbd\xac\xc0\x0d\x53\x44\x4d\x29\xd3\xe3\x61\x06\x77\xfb\x0c\x1b\xfa\x05\x17\x3b\x32\xc8\x6c\xd3\x0e\xa8\x18\xde\x64\xfb\x8a\xb8\x84\xf6\x3f\x17\xc4\x1f\xea\x8c\xea\xd5\x42\xc1\xb3\xdb\x68\x90\x8a\x24\x2f\x0c\xc5\x9b\xb6\xd6\x16\x5d\x3d\x38\xf1\xf6\x80\xf2\x56\x47\xf3\x95\x64\x7e\x50\x14\x02\x73\xa9\x0a\x04\x01\xcc\xf3\x1b\x3c\x9a\xfd\x98\x86\xdf\x54\xe6\x36\x50\xe9\xc0\x46\xd7\xae\x54\xd1\xe4\xaf\x98\xc3\xa6\xee\x44\xce\x8c\x16\xdf\x33\x87\x0b\xca\x12\x91\xac\xa4\xbe\x4e\xdb\xb2\x32\x21\x21\x16\xdb\x0c\x5f\xe3\x33\xbd\xa9\x8a\x88\xed\x3e\x65\x46\x4d\x8b\x16\xf0\x73\xe7\x76\x3d\x42\xb5\xe1\xba\x14\xe8\xd9\x99\x4f\x67\xc2\x20\x0d\x41\x07\x27\x61\x3a\x28\x49\x6f\x73\xdb\x44\xdb\xe2\x5e\x54\x4e\x1c\xe0\xd4\x66\x1e\xfe\x0c\x96\x52\xb3\x79\x00\x9d\x87\xed\xee\xc6\x82\x5e\xdc\x8f\xcd\xc8\xaa\x1c\x44\x76\x22\x14\x99\xef\x56\x73\x0e\x93\x14\x77\xa3\xa4\x52\xa7\xad\x55\x6c\xe2\x1a\x6a\x57\xd1\xb8\x4a\x8f\x3a\xa9\xcf\xab\x20\x25\xc8\xa8\x13\x30\x3c\x78\xbd\x3e\x9d\x73\x8f\xd9\x10\x9c\x15\xa8\x8a\x58\x70\x34\x38\xbb\xff\x26\x6d\x42\xcd\x2f\x8f\x7c\x20\x39\xa5\x37\x70\xf1\x1f\x65\x8a\xc5\xa3\x4f\x02\x57\x35\x17\x1b\x91\xa2\xa6\xd4\x67\x1c\x54\xde\xb4\xaf\x53\x99\x92\x23\xc1\x3d\xcc\x62\x9c\x21\xd9\xb5\xde\x5f\xd6\x1e\xa5\x4a\x45\x7e\x10\x74\xc4\x9e\x7f\x3b\xdd\xf6\x6c\xb6\xf2\xc9\xb6\xbe\x01\x45\x2e\x4a\x3b\xaf\x41\x05\x91\x38\x68\x35\x36\x0e\x1a\xc7\xc9\x52\x6d\xc1\x9c\x9e\x50\x29\x7b\x3e\xe0\x39\x67\x32\xe8\xae\xaa\xac\x0c\xbb\x18\x4c\x11\x3b\x58\xc8\x80\x88\xf1\x6d\x7a\x3d\x36\xd0\x8e\xc1\xb1\xf8\xbb\xa9\xd1\xd6\x8f\x07\x6b\x12\x1a\x5b\xf1\xea\xed\x94\x1b\xe1\x1b\xe7\x0e\x75\x3d\x4e\xcf\x5b\x91\x2e\x78\x55\xd8\x8d\x1f\x1b\x09\x60\x38\xd2\xb8\xaa\x1f\xb5\x9d\x2c\xd4\x5c\x44\x78\x1f\x88\x4c\xaf\xa6\x2c\xeb\xca\x00\x51\xbe\xc9\x2e\x60\xaf\x0d\xb4\x02\xb3\x47\x0a\x3f\x4b\xbc\xc4\xa4\xff\xbb\xb3\x0e\x4f\xb3\xf0\x71\x3a\x84\x9a\x3d\x36\x33\x25\xeb\x2f\x76\x66\x5e\xc3\xd0\x66\xfc\xd4\x10\x3b\x78\x15\x61\x2d\xfc\xe6\x05\x7e\xda\x86\x43\x15\xb9\x78\xc2\x8b\x98\x42\x3e\x56\x42\x69\xba\xa2\xf3\x1e\xec\x00\x00\x00\x00\x21\x83\x67\xa8\x14\xed\xdc\x51\xeb\x96\x93\x98\x74\x4d\x13\x7c\x00\x00\x00\x00\x00\x00\x00\x01\x10\x00\x09\x6c\x65\x6e\x2d\x6c\x69\x6d\x69\x74\x00\x00\x00\x0e\x00\x98\x32\x91\x09"s;// xxd -p + std::string reference5 = "\x00\x00\x00\x00"s;// xxd -p + std::string reference6 = "\x80\x00\x00\x00\x06\x20\x00\x00\x17\xd7\xf4\xbd\xdd\x5d\x39\xcc\xce\x7e\xe8\x6e\x9d\x92\x70\x2d\x96\x68\x9f\xba\x83\xe1\x99\x2d\x9f\xe4\xed\x65\x3f\x09\x88\x5d\x28\x5c\xc0\x99\x36\x80\x87\xdc\x02\xc0\xe5\x5a\xef\xae\x56\x95\x59\x91\xb6\xde\x35\xf4\x1c\x60\x1e\x30\xd1\x77\x1c\x70\x2d\xda\xed\xc5\xfc\x58\x8a\x28\x94\x2b\x4f\x96\x97\x18\xa0\x65\x22\x48\xa6\x06\x1b\x65\x7f\xf4\x82\x8f\xe3\x05\xde\x00\x70\xb7\xb5\xa4\x1e\xc3\x43\xe9\x49\x92\x8b\x47\xa6\xdd\x97\xd4\x93\x4d\xb4\xd0\x76\xc7\x4d\xeb\x71\x48\x77\x43\x91\xcd\xe5\x8f\x8d\xa2\xcb\x28\x53\xcf\x82\xa4\xd5\x85\x78\xae\x37\xd9\x19\x13\x54\x52\x0c\x7d\xcb\x2a\xfd\x1b\x38\x66\xaa\xd3\x23\xe6\xf7\x20\xd5\x0a\xf1\x4b\x59\xe6\x0b\xbe\x42\xa9\x5e\x7d\xce\xec\x73\xd9\x8b\xc6\x4b\x35\xe4\x69\xbc\x10\x35\x8a\x0e\x09\x2b\xf1\x9f\x38\x15\x57\x21\x08\xe1\xa6\x6e\xf1\x8c\x52\x08\x1b\x85\x50\xe0\x1e\x01\x35\x3a\x0a\x72\x1b\xb3\xda\xfd\x78\x36\x10\xb7\x1a\x2e\x93\xd4\x63\xab\x0b\x98\xfb\x4b\x97\x47\x7f\x61\x0f\x36\x7f\xfe\x02\x36\x2e\x30\xa5\xdb\x8f\xde\xd0\xc0\xc6\x9c\x3a\x7b\x71\x24\x1e\xc3\x04\xac\x31\x7a\xf5\xf3\x33\x26\x99\xa6\x4f\x43\x6c\x46\x5c\x4d\xf5\xb8\x43\x1d\xd7\x73\x3e\xe9\xb6\x3b\xdf\xff\xf6\xf3\x2a\x34\x3f\x39\x60\x4b\xed\xde\xf4\x2f\x5d\xe7\xab\xfe\xa1\x4d\x11\x9c\xcc\x41\xf8\x3c\xdd\x18\xea\xea\x45\x3e\xa5\x0b\xb5\x7b\x38\x5e\x26\x72\xdd\x24\x51\x48\xcf\x79\xa7\xd9\x06\x2e\xe8\xfb\x5d\x3d\x4a\x81\x0d\x15\x48\xd0\x84\x15\x0f\x15\x5b\xc3\x9d\x48\xc4\x9e\x2f\x45\xd4\x1c\x24\xc4\x90\x60\xe6\xa1\x19\x6f\x2d\x3a\xf9\x52\x0d\x06\x93\x21\xb4\xc2\x43\xd7\xce\x5b\xaa\x42\x20\x35\x6c\x45\xa2\xea\xd8\xe0\xc7\x90\xe3\x4a\x3e\xb1\x65\xaf\x5c\xe3\x23\x58\x65\x88\x92\x0b\x98\xc4\x3f\x7b\xb4\x42\x6c\x77\x7f\xf3\x51\x1a\x17\x89\x1f\x03\x66\x95\xbb\x83\x3f\xfb\xd1\x8c\x46\x40\x7f\xd1\xff\x7a\xbb\xb2\xcc\xaa\xc9\xfe\xbe\x7a\xff\x5b\xf7\x17\xe0\x4d\xca\x6b\xf2\xef\x0f\x0c\x48\x90\x5c\x6d\xa4\x53\xf4\xfb\xe3\xfe\x38\x61\xdb\x32\xe6\x6e\x35\x86\xad\xad\x33\x3b\x7f\x92\x7f\xf9\x4c\xbd\x92\xbe\x41\x4f\x23\x37\xa2\x6e\xd9\x7d\x82\x47\xa4\x8f\x77\x51\xdb\x2f\xd6\xda\xcb\x1d\x7b\x2b\xe3\x29\x6f\x03\xad\xce\x05\xa7\xab\x34\x52\xb8\x94\xd3\x08\x5b\x9f\x0d\xec\x27\x09\xce\xb5\x82\x89\x43\xe0\xc3\xc3\x7f\xad\xeb\x30\x0a\x5c\xa8\x88\xc8\x38\x02\x18\x4d\xda\x80\x02\xf5\xb0\x0b\xbf\x3b\xbc\x11\x6b\xe7\xfd\x4b\x4a\xe9\x48\x31\x9f\x3a\x83\x80\x7b\x21\x73\xf8\x99\x43\x1b\xd6\x1a\xb6\xce\xe4\xff\x0e\x58\x33\x86\xd0\x09\x70\x14\x63\xc6\x45\x8f\x2a\x5f\xc8\xb2\x82\xdc\x4f\x99\x81\xa8\x87\xe4\xbf\xc5\xfe\x35\x81\x73\x63\x21\xf1\x82\xdb\x73\xfe\xe2\x1b\x5f\xff\x07\x8b\xb4\xef\xb6\x6a\x92\x9c\xcf\x6d\x09\xb1\xc1\x78\xa4\x56\x37\xe4\x6a\xf9\x01\x1e\x8c\x51\x14\x10\x34\xbd\xb0\x4f\xc6\xcb\xd6\xf4\xee\xed\x7c\x23\xa2\x80\xde\x5d\x76\x9d\x09\xd8\x1d\x45\x21\xc1\xad\xe9\x74\xf2\x61\xd4\x0b\xc7\x0d\x6a\xab\x25\x7c\x19\xa3\xf0\x88\x87\x7b\xba\xf0\x37\x3f\x59\x8f\x7f\x8e\x25\xbb\x80\x70\xf2\xe3\xf5\x0a\xa5\xb5\x2c\x43\x6f\xf1\x7b\xd3\x48\x86\x9a\xa2\xb1\x42\x89\xf3\x00\x0e\x9d\x99\xca\x5e\xb0\x2a\xf7\x46\xe6\xfb\xb9\x22\xc9\x14\xb9\x75\x95\x82\x87\x0d\x9a\x54\x80\xf6\xbc\x1f\xd9\xcb\x09\x0c\x4b\x5e\x38\xa1\x10\xaa\x32\xb1\xfa\xcc\xba\x37\x37\x01\x6d\x7f\xf1\x9d\x49\x35\x6a\x5b\xec\xec\xfb\x6a\x46\xca\x41\x03\x35\xfb\x56\xef\x5b\xe2\x44\xa0\x9e\xf8\x99\xde\x92\x17\x12\x98\x5e\x11\xe0\x73\x94\x23\xc9\x81\x61\xcc\x8a\xb4\x72\x5d\x6e\x1b\xfb\xa4\x3c\x79\x06\x12\xd3\x00\x47\xa7\x8e\x8c\x42\x9d\xa4\xfd\x34\xcd\xf0\x94\xdc\x3c\x84\xe3\xf7\xfc\x16\xd8\x0d\x4a\x9d\x05\xe1\xff\x1b\x47\xf1\xdc\xdf\xa4\x86\x09\xc1\xfe\xde\x45\xe4\x43\xfd\x0d\x05\xf4\x3f\xb5\x2e\xe7\x48\xde\xc8\x2b\x8a\x5f\xee\x28\x66\x09\xb4\x65\x12\x77\x23\x6a\xe2\x80\xa4\xc2\xa5\x1e\xbe\xd9\x8e\xae\x56\x4d\x56\xfe\xed\xe8\x0e\x39\xab\xba\x68\xfd\x39\x2c\x22\x30\x80\x31\xfe\x34\x46\x7d\xea\x3c\x8e\x5b\x87\xef\xac\x2d\xe3\x80\x19\x5a\xd8\xba\x63\xd5\xb4\x59\xc0\x38\xff\xc5\xd8\x00\x75\x8e\x31\x7c\x1f\x90\x98\xdc\x4a\x9c\x67\x84\x12\x87\xb2\x06\xcc\x5c\x41\xc4\xa2\x22\x88\x2d\xf5\x43\xdc\x5f\xe8\x71\xa0\x0f\xbd\xa8\x33\x6f\x83\xbf\xc0\x3a\xfd\xa7\xf9\x8a\x93\x12\x94\x0a\x9e\x39\x68\x60\xc2\xfe\x0a\x2c\x13\xb6\x25\x5a\x85\x62\x1c\x5b\x44\x2c\x05\x5b\xe6\x92\x56\x6b"s;// xxd -p - r_zstd.seek(4); - written = r_zstd.read(buffer, 4);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 4); - std_buffer = std::string(buffer, written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference5), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { + chunked_reader c1 = chunked_reader(fastafs_file.c_str()); + c1.fopen(0); // open file handle and load buffer + + chunked_reader c2 = chunked_reader(fastafs_file_zstd.c_str()); + c2.fopen(0); // open file handle and load buffer + + { + // C1 + written = c1.read(buffer, 1000); + buffer[written] = '\0'; + BOOST_CHECK_EQUAL(written, 1000); + + written = c1.read(buffer, 1000); + buffer[written] = '\0'; + BOOST_CHECK_EQUAL(written, 593); + } + + { + // C2 + written = c2.read(buffer, 1000); + buffer[written] = '\0'; + BOOST_CHECK_EQUAL(written, 1000); + + written = c2.read(buffer, 1000); + buffer[written] = '\0'; + BOOST_CHECK_EQUAL(written, 593); + } - written = r_zstd.read(buffer, 1024);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 1024); - std_buffer = std::string(buffer, written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference6), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } } - BOOST_AUTO_TEST_SUITE_END() diff --git a/test/database/test_database.cpp b/test/database/test_database.cpp new file mode 100644 index 00000000..c3dc2918 --- /dev/null +++ b/test/database/test_database.cpp @@ -0,0 +1,39 @@ +#define BOOST_TEST_MODULE database + +#include + +#include "config.hpp" + +#include "database.hpp" + + +//#include +//#include + + +BOOST_AUTO_TEST_SUITE(Testing) + + +BOOST_AUTO_TEST_CASE(test_database__01) +{ + const std::string default_dir_1 = database::get_default_dir(); + unsetenv("HOME"); + const std::string default_dir_2 = database::get_default_dir(); + + BOOST_REQUIRE(default_dir_1.size() > 0); + BOOST_REQUIRE(default_dir_2.size() > 0); + + BOOST_CHECK_EQUAL(default_dir_1, default_dir_2); + // printf("[%s]==[%s]\n", default_dir_1.c_str(), default_dir_2.c_str()); +} + + +BOOST_AUTO_TEST_CASE(test_database__02) +{ + database d("tmp/database"); +} + + + + +BOOST_AUTO_TEST_SUITE_END() diff --git a/test/fastafs/test_fastafs.cpp b/test/fastafs/test_fastafs.cpp index 664a8916..53552e13 100644 --- a/test/fastafs/test_fastafs.cpp +++ b/test/fastafs/test_fastafs.cpp @@ -52,6 +52,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_seq_fastafile_size) // > c h r 1 \n t t t t c c c c a a a a g g g \n g \n BOOST_CHECK_EQUAL(fs.data[0]->fasta_filesize(15), 24); chunked_reader file = chunked_reader(fs.filename.c_str()); + file.fopen(0); ffs2f_init* cache_p40 = fs.init_ffs2f(40, true); ffs2f_init* cache_p23 = fs.init_ffs2f(23, true); @@ -102,6 +103,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_seq_fastafile_size_padding_0) // > c h r 1 \n T T T T C C C C A A A A G G G G \n BOOST_CHECK_EQUAL(fs.data[0]->fasta_filesize(fs.data[0]->n), 23); chunked_reader file = chunked_reader(fs.filename.c_str()); + file.fopen(0); ffs2f_init* cache_p0 = fs.init_ffs2f(0, true); // then: check returncodes: @@ -116,6 +118,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_seq_fastafile_size_padding_0) BOOST_CHECK_EQUAL(ret, 1); } + // check if out of bound query returns 0 ret = fs.data[0]->view_fasta_chunk(cache_p0->sequences[0], chunk, 1, ref.size(), file); BOOST_CHECK_EQUAL(ret, 0); @@ -140,6 +143,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_seq_fastafile_size_padding_0__no_masking) // > c h r 1 \n T T T T C C C C A A A A G G G G \n BOOST_CHECK_EQUAL(fs.data[0]->fasta_filesize(fs.data[0]->n), 23); chunked_reader file = chunked_reader(fs.filename.c_str()); + file.fopen(0); ffs2f_init* cache_p0 = fs.init_ffs2f(0, false); // no masking; everything must be uppercase @@ -174,6 +178,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_seq_sha1) BOOST_REQUIRE(fs.data.size() > 0); chunked_reader file = chunked_reader(fs.filename.c_str()); + file.fopen(0); //fs.data[0]->sha1(cache_p0->sequences[0], &file); BOOST_CHECK_EQUAL(fs.data[0]->sha1(cache_p0->sequences[0], file), "2c0cae1d4e272b3ba63e7dd7e3c0efe62f2aaa2f"); @@ -194,6 +199,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_seq_md5) BOOST_REQUIRE(fs.data.size() > 0); chunked_reader file = chunked_reader(fs.filename.c_str()); + file.fopen(0); BOOST_CHECK_EQUAL(fs.data[0]->md5(cache->sequences[0], file), "75255c6d90778999ad3643a2e69d4344"); BOOST_CHECK_EQUAL(fs.data[1]->md5(cache->sequences[1], file), "8b5673724a9965c29a1d76fe7031ac8a"); @@ -391,7 +397,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -408,7 +414,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -427,7 +433,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -446,7 +452,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -465,7 +471,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 0); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -486,7 +492,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -506,7 +512,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -525,7 +531,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -545,7 +551,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -564,7 +570,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -583,7 +589,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -602,7 +608,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -622,7 +628,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); @@ -643,7 +649,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -663,7 +669,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 2); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -684,7 +690,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 2); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -705,7 +711,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 0); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -725,7 +731,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 8); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -753,7 +759,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 4); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -777,7 +783,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 2); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -801,7 +807,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 2); @@ -825,7 +831,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 15); @@ -836,7 +842,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) BOOST_CHECK_EQUAL(buffer[2], 'T'); BOOST_CHECK_EQUAL(buffer[3], 'G'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 15); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, 4, 4); // small buffer size @@ -846,7 +852,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) BOOST_CHECK_EQUAL(buffer[2], 'T'); BOOST_CHECK_EQUAL(buffer[3], 'G'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 15); @@ -857,7 +863,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) BOOST_CHECK_EQUAL(buffer[2], 'a'); BOOST_CHECK_EQUAL(buffer[3], 'a'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 15); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, 4, 12); // small buffer size @@ -866,7 +872,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) BOOST_CHECK_EQUAL(buffer[1], 'c'); BOOST_CHECK_EQUAL(buffer[2], 'c'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 15); delete[] buffer; @@ -883,7 +889,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 4); @@ -907,7 +913,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 0); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); // small buffer size @@ -930,64 +936,64 @@ BOOST_AUTO_TEST_CASE(test_fastafs__failing_example) { // s=4096, off=20480 -/* + /* - // is auto-generated by python script - fastafs fs = fastafs("test"); - fs.load("tmp/benchmark/test.zst"); + // is auto-generated by python script + fastafs fs = fastafs("test"); + fs.load("tmp/benchmark/test.zst"); - BOOST_REQUIRE(fs.data.size() > 0); + BOOST_REQUIRE(fs.data.size() > 0); - ffs2f_init* cache_p40 = fs.init_ffs2f(40, true); // equals original fasta + ffs2f_init* cache_p40 = fs.init_ffs2f(40, true); // equals original fasta - const int READ_BUFFER_SIZE_F = 4096 ; // make sure it is large enough, error occurrsed with buf len=4096 - char* buffer = new char[READ_BUFFER_SIZE_F + 2]; - uint32_t ret; + const int READ_BUFFER_SIZE_F = 4096 ; // make sure it is large enough, error occurrsed with buf len=4096 + char* buffer = new char[READ_BUFFER_SIZE_F + 2]; + uint32_t ret; - // test the first read - chunked_reader fh1 = chunked_reader(fs.filename.c_str()); - flush_buffer(buffer, READ_BUFFER_SIZE_F + 1, '\0'); - ret = fs.view_fasta_chunk(cache_p40, buffer, 4096, 0, fh1); - //printf("[%i]\n", ret); - buffer[4096] = '\0'; - //printf("[%s]\n", buffer); - //printf("----------------------------------------------------------------\n", buffer); + // test the first read + chunked_reader fh1 = chunked_reader(fs.filename.c_str()); + flush_buffer(buffer, READ_BUFFER_SIZE_F + 1, '\0'); + ret = fs.view_fasta_chunk(cache_p40, buffer, 4096, 0, fh1); + //printf("[%i]\n", ret); + buffer[4096] = '\0'; + //printf("[%s]\n", buffer); + //printf("----------------------------------------------------------------\n", buffer); - // test the first read - flush_buffer(buffer, READ_BUFFER_SIZE_F + 1, '\0'); - ret = fs.view_fasta_chunk(cache_p40, buffer, 4096, 0); - //printf("[%i]\n", ret); - buffer[4096] = '\0'; - //printf("[%s]\n", buffer); - //printf("----------------------------------------------------------------\n", buffer); + // test the first read + flush_buffer(buffer, READ_BUFFER_SIZE_F + 1, '\0'); + ret = fs.view_fasta_chunk(cache_p40, buffer, 4096, 0); + //printf("[%i]\n", ret); + buffer[4096] = '\0'; + //printf("[%s]\n", buffer); + //printf("----------------------------------------------------------------\n", buffer); - // test the first read - //chunked_reader fh2 = chunked_reader(fs.filename.c_str()); - flush_buffer(buffer, READ_BUFFER_SIZE_F + 1, '\0'); - ret = fs.view_fasta_chunk(cache_p40, buffer, 4096, 20480, fh1); - //printf("[%i]\n", ret); - buffer[4096] = '\0'; - //printf("[%s]\n", buffer); - //printf("----------------------------------------------------------------\n", buffer); + // test the first read + //chunked_reader fh2 = chunked_reader(fs.filename.c_str()); + flush_buffer(buffer, READ_BUFFER_SIZE_F + 1, '\0'); + ret = fs.view_fasta_chunk(cache_p40, buffer, 4096, 20480, fh1); + //printf("[%i]\n", ret); + buffer[4096] = '\0'; + //printf("[%s]\n", buffer); + //printf("----------------------------------------------------------------\n", buffer); - // test the first read - flush_buffer(buffer, READ_BUFFER_SIZE_F + 1, '\0'); - ret = fs.view_fasta_chunk(cache_p40, buffer, 4096, 20480); - //printf("[%i]\n", ret); - buffer[4096] = '\0'; - //printf("[%s]\n", buffer); - //printf("----------------------------------------------------------------\n", buffer); + // test the first read + flush_buffer(buffer, READ_BUFFER_SIZE_F + 1, '\0'); + ret = fs.view_fasta_chunk(cache_p40, buffer, 4096, 20480); + //printf("[%i]\n", ret); + buffer[4096] = '\0'; + //printf("[%s]\n", buffer); + //printf("----------------------------------------------------------------\n", buffer); - delete cache_p40; - delete[] buffer; - * - */ + delete cache_p40; + delete[] buffer; + * + */ } diff --git a/test/fastafs/test_ucsc2bit.cpp b/test/fastafs/test_ucsc2bit.cpp index 025df492..63df271e 100644 --- a/test/fastafs/test_ucsc2bit.cpp +++ b/test/fastafs/test_ucsc2bit.cpp @@ -404,8 +404,10 @@ BOOST_AUTO_TEST_CASE(test_fastafs_view_chunked_2bit_with_offset) fastafs fs = fastafs("test"); fs.load(fastafs_file); BOOST_REQUIRE(fs.data.size() > 0); + std::ifstream file(fs.filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate); BOOST_REQUIRE(file.is_open()); + // check ucsc2bit header: char buffer[1024 + 1]; static std::string reference = UCSC2BIT_MAGIC + UCSC2BIT_VERSION + "\x07\x00\x00\x00"s "\x00\x00\x00\x00"s // literals bypass a char* conversion and preserve nullbytes @@ -459,6 +461,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_view_chunked_2bit_with_offset) "\x00\x00\x00\x00"s "\x09\x30" // NNAC TG?? = 00001001 00110000 ; + uint32_t complen; // voor lengte 1...(245-1) // voor i = 0, 245-lengte @@ -467,6 +470,8 @@ BOOST_AUTO_TEST_CASE(test_fastafs_view_chunked_2bit_with_offset) fs.view_ucsc2bit_chunk(buffer, complen, file_offset); BOOST_CHECK_EQUAL_MESSAGE(reference.compare(file_offset, complen, std_string_nullbyte_safe(buffer, 0, complen), 0, complen), 0, "Failed during len=" << complen << " and file offset=" << file_offset); } + + printf("\n"); } //for(uint32_t i = 0; i < complen; i++) { //printf("ref[%i]: %u\t == buf[%i]: %u",i + file_offset, (signed char) reference[i + file_offset], i, (signed char) buffer[i], (unsigned char) buffer[i]); diff --git a/test/fivebit_fivebytes/test_fivebit_fivebytes.cpp b/test/fivebit_fivebytes/test_fivebit_fivebytes.cpp index bc56bd2d..060b2366 100644 --- a/test/fivebit_fivebytes/test_fivebit_fivebytes.cpp +++ b/test/fivebit_fivebytes/test_fivebit_fivebytes.cpp @@ -99,7 +99,7 @@ BOOST_AUTO_TEST_CASE(test_fivebit_fivebytes_conversions) -BOOST_AUTO_TEST_CASE(test_dict_conv) +BOOST_AUTO_TEST_CASE(test_fivebit_fivebytes__dict_conversions) { char hash[255]; hash['A'] = 0; @@ -141,7 +141,7 @@ BOOST_AUTO_TEST_CASE(test_dict_conv) for(size_t i = 0; i < dict.size(); i++) { // set and compress amino acid string for(size_t j = 0 ; j < 8; j ++) { - f.set((unsigned char) j, hash[ dict[i][j] ]) ; + f.set((unsigned char) j, (unsigned char) hash[(unsigned char) dict[i][j] ]) ; } // decompress diff --git a/test/flags/test_flags.cpp b/test/flags/test_flags.cpp index 51e9a2a8..81f5ff22 100644 --- a/test/flags/test_flags.cpp +++ b/test/flags/test_flags.cpp @@ -18,7 +18,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_flags) { fastafs_flags f; - char buffer[2 + 1]; + unsigned char buffer[2 + 1]; buffer[2] = '\0'; // test: 00000000 00000000 diff --git a/test/sequenceregion/test_sequenceregion.cpp b/test/sequenceregion/test_sequenceregion.cpp index 436338a1..c46cb63b 100644 --- a/test/sequenceregion/test_sequenceregion.cpp +++ b/test/sequenceregion/test_sequenceregion.cpp @@ -32,49 +32,49 @@ BOOST_AUTO_TEST_CASE(test_sequence_region) char arg[] = "/seq/chr1"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chr1"); - BOOST_CHECK_EQUAL(sr.has_defined_end, false); // not defined; sequence's end + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chr1"); + BOOST_CHECK_EQUAL(sr.has_defined_end(), false); // not defined; sequence's end } { char arg[] = "/seq/chr1:"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chr1"); - BOOST_CHECK_EQUAL(sr.has_defined_end, false); // not defined; sequence's end + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chr1"); + BOOST_CHECK_EQUAL(sr.has_defined_end(), false); // not defined; sequence's end } { char arg[] = "/seq/chr1:123"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chr1"); - BOOST_CHECK_EQUAL(sr.start, 123); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chr1"); + BOOST_CHECK_EQUAL(sr.get_start_position(), 123); - BOOST_CHECK_EQUAL(sr.has_defined_end, true); - BOOST_CHECK_EQUAL(sr.end, 123); + BOOST_CHECK_EQUAL(sr.has_defined_end(), true); + BOOST_CHECK_EQUAL(sr.get_end_position(), 123); } { char arg[] = "/seq/chr1:-123"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chr1"); - BOOST_CHECK_EQUAL(sr.start, 0); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chr1"); + BOOST_CHECK_EQUAL(sr.get_start_position(), 0); - BOOST_CHECK_EQUAL(sr.has_defined_end, true); - BOOST_CHECK_EQUAL(sr.end, 123); + BOOST_CHECK_EQUAL(sr.has_defined_end(), true); + BOOST_CHECK_EQUAL(sr.get_end_position(), 123); } { char arg[] = "/seq/chr1:123-456"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chr1"); - BOOST_CHECK_EQUAL(sr.start, 123); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chr1"); + BOOST_CHECK_EQUAL(sr.get_start_position(), 123); - BOOST_CHECK_EQUAL(sr.has_defined_end, true); - BOOST_CHECK_EQUAL(sr.end, 456); + BOOST_CHECK_EQUAL(sr.has_defined_end(), true); + BOOST_CHECK_EQUAL(sr.get_end_position(), 456); } @@ -82,10 +82,10 @@ BOOST_AUTO_TEST_CASE(test_sequence_region) char arg[] = "/seq/chr1:123-"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chr1"); - BOOST_CHECK_EQUAL(sr.start, 123); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chr1"); + BOOST_CHECK_EQUAL(sr.get_start_position(), 123); - BOOST_CHECK_EQUAL(sr.has_defined_end, false); + BOOST_CHECK_EQUAL(sr.has_defined_end(), false); //BOOST_CHECK_EQUAL(sr.end , 456); - underfined } @@ -94,7 +94,9 @@ BOOST_AUTO_TEST_CASE(test_sequence_region) sequence_region *sr = nullptr; if(sr == nullptr) {// compiler doesn't understand this otherwise +#if DEBUG BOOST_CHECK_THROW(sr = new sequence_region(&(arg[5])), std::invalid_argument); +#endif //DEBUG } } @@ -108,49 +110,49 @@ BOOST_AUTO_TEST_CASE(test_sequence_region3) char arg[] = "/seq/chrRr1"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.has_defined_end, false); // not defined; sequence's end + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); + BOOST_CHECK_EQUAL(sr.has_defined_end(), false); // not defined; sequence's end } { char arg[] = "/seq/chrRr1:"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.has_defined_end, false); // not defined; sequence's end + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); + BOOST_CHECK_EQUAL(sr.has_defined_end(), false); // not defined; sequence's end } { char arg[] = "/seq/chrRr1:1234"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.start, 1234); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); + BOOST_CHECK_EQUAL(sr.get_start_position(), 1234); - BOOST_CHECK_EQUAL(sr.has_defined_end, true); - BOOST_CHECK_EQUAL(sr.end, 1234); + BOOST_CHECK_EQUAL(sr.has_defined_end(), true); + BOOST_CHECK_EQUAL(sr.get_end_position(), 1234); } { char arg[] = "/seq/chrRr1:-1234"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.start, 0); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); + BOOST_CHECK_EQUAL(sr.get_start_position(), 0); - BOOST_CHECK_EQUAL(sr.has_defined_end, true); - BOOST_CHECK_EQUAL(sr.end, 1234); + BOOST_CHECK_EQUAL(sr.has_defined_end(), true); + BOOST_CHECK_EQUAL(sr.get_end_position(), 1234); } { char arg[] = "/seq/chrRr1:1234-1235"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.start, 1234); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); + BOOST_CHECK_EQUAL(sr.get_start_position(), 1234); - BOOST_CHECK_EQUAL(sr.has_defined_end, true); - BOOST_CHECK_EQUAL(sr.end, 1235); + BOOST_CHECK_EQUAL(sr.has_defined_end(), true); + BOOST_CHECK_EQUAL(sr.get_end_position(), 1235); } @@ -158,10 +160,10 @@ BOOST_AUTO_TEST_CASE(test_sequence_region3) char arg[] = "/seq/chrRr1:1234-"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.start, 1234); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); + BOOST_CHECK_EQUAL(sr.get_start_position(), 1234); - BOOST_CHECK_EQUAL(sr.has_defined_end, false); + BOOST_CHECK_EQUAL(sr.has_defined_end(), false); //BOOST_CHECK_EQUAL(sr.end , 1235); - underfined } @@ -171,7 +173,9 @@ BOOST_AUTO_TEST_CASE(test_sequence_region3) if(sr == nullptr) {// compiler doesn't understand this otherwise char arg[] = "/seq/chrRr1:1235-1234"; +#if DEBUG BOOST_CHECK_THROW(sr = new sequence_region(&(arg[5])), std::invalid_argument); +#endif //DEBUG } } @@ -186,49 +190,49 @@ BOOST_AUTO_TEST_CASE(test_sequence_region2) char arg[] = "/seq/chrRr1"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.has_defined_end, false); // not defined; sequence's end + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); + BOOST_CHECK_EQUAL(sr.has_defined_end(), false); // not defined; sequence's end } { char arg[] = "/seq/chrRr1:"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.has_defined_end, false); // not defined; sequence's end + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); + BOOST_CHECK_EQUAL(sr.has_defined_end(), false); // not defined; sequence's end } { char arg[] = "/seq/chrRr1:123"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.start, 123); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); + BOOST_CHECK_EQUAL(sr.get_start_position(), 123); - BOOST_CHECK_EQUAL(sr.has_defined_end, true); - BOOST_CHECK_EQUAL(sr.end, 123); + BOOST_CHECK_EQUAL(sr.has_defined_end(), true); + BOOST_CHECK_EQUAL(sr.get_end_position(), 123); } { char arg[] = "/seq/chrRr1:-123"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.start, 0); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); + BOOST_CHECK_EQUAL(sr.get_start_position(), 0); - BOOST_CHECK_EQUAL(sr.has_defined_end, true); - BOOST_CHECK_EQUAL(sr.end, 123); + BOOST_CHECK_EQUAL(sr.has_defined_end(), true); + BOOST_CHECK_EQUAL(sr.get_end_position(), 123); } { char arg[] = "/seq/chrRr1:123-456"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.start, 123); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); + BOOST_CHECK_EQUAL(sr.get_start_position(), 123); - BOOST_CHECK_EQUAL(sr.has_defined_end, true); - BOOST_CHECK_EQUAL(sr.end, 456); + BOOST_CHECK_EQUAL(sr.has_defined_end(), true); + BOOST_CHECK_EQUAL(sr.get_end_position(), 456); } @@ -236,10 +240,10 @@ BOOST_AUTO_TEST_CASE(test_sequence_region2) char arg[] = "/seq/chrRr1:123-"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.start, 123); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); + BOOST_CHECK_EQUAL(sr.get_start_position(), 123); - BOOST_CHECK_EQUAL(sr.has_defined_end, false); + BOOST_CHECK_EQUAL(sr.has_defined_end(), false); //BOOST_CHECK_EQUAL(sr.end , 456); - underfined } @@ -248,7 +252,9 @@ BOOST_AUTO_TEST_CASE(test_sequence_region2) sequence_region *sr = nullptr; if(sr == nullptr) {// compiler doesn't understand this otherwise +#if DEBUG BOOST_CHECK_THROW(sr = new sequence_region(&(arg[5])), std::invalid_argument); +#endif //DEBUG } } diff --git a/test/utils/test_utils.cpp b/test/utils/test_utils.cpp index 50dc2564..50fd919e 100644 --- a/test/utils/test_utils.cpp +++ b/test/utils/test_utils.cpp @@ -16,7 +16,7 @@ BOOST_AUTO_TEST_SUITE(Testing) /** * @description test contains a sequence that intially failed chunked_view with chunk size > 1 */ -BOOST_AUTO_TEST_CASE(test__twobytes_to_uint) +BOOST_AUTO_TEST_CASE(test_utils__twobytes_to_uint) { { // test: 00000000 00000000 = 0 diff --git a/test/view/test_view.cpp b/test/view/test_view.cpp index f2f9479b..cc8fbe67 100644 --- a/test/view/test_view.cpp +++ b/test/view/test_view.cpp @@ -28,7 +28,7 @@ static int test_i = 0; BOOST_AUTO_TEST_CASE(test_fastafs_seq_static_func) { - printf("test %i\n",++test_i); + printf("test %i\n", ++test_i); /* padding=4, offset=0, position_until=0, 1, 2, 3: 0 "A" "AC" "ACT" "ACTG" padding=4, offset=0, position_until=4, 5, 6, 7, 8: 1 "ACTG\n" "ACTG\nA" "ACTG\nAA" "ACTG\nAAA" "ACTG\nAAAA" @@ -135,7 +135,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_seq_static_func) BOOST_AUTO_TEST_CASE(test_fastafs_twobit_offset_calc) { - printf("test %i\n",++test_i); + printf("test %i\n", ++test_i); // testing "ACTGACTGNNNNACTG" uint32_t num_Ns; // number of N's until certain nucleotide is reached @@ -184,7 +184,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_twobit_offset_calc) */ BOOST_AUTO_TEST_CASE(test_chunked_viewing) { - printf("test %i\n",++test_i); + printf("test %i\n", ++test_i); uint32_t written; @@ -336,6 +336,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing) std::string full_file = ">chr1\nTTTT\nCCCC\nAAAA\nGGGG\n>chr2\nACTG\nACTG\nNNNN\nACTG\n>chr3.1\nACTG\nACTG\nAAAA\nC\n>chr3.2\nACTG\nACTG\nAAAA\nCC\n>chr3.3\nACTG\nACTG\nAAAA\nCCC\n>chr4\nACTG\nNNNN\n>chr5\nNNAC\nTG\n"; //std::string full_file = ">chr1 TTTT CCCC AAAA GGGG >chr2 ACTG ACTG NNNN ACTG >chr3.1 ACTG ACTG AAAA C >chr3.2 ACTG ACTG AAAA CC >chr3.3 ACTG ACTG AAAA CCC >chr4 ACTG NNNN >chr5 NNAC TG "; chunked_reader fhc = chunked_reader(fs.filename.c_str()); + fhc.fopen(0); for(uint32_t offset = 0; offset < 62; ++offset) { std::string substr_file = full_file.substr(offset, 100); @@ -360,7 +361,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing) BOOST_AUTO_TEST_CASE(test_chunked_viewing_sub) { - printf("test %i\n",++test_i); + printf("test %i\n", ++test_i); uint32_t written; std::string test_name = "test"; @@ -380,6 +381,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_sub) // test fastafs_seq functions //std::ifstream fh(fastafs_file.c_str(), std::ios::in | std::ios::binary | std::ios::ate); chunked_reader fh = chunked_reader(fastafs_file.c_str()); + fh.fopen(0); //BOOST_REQUIRE(fh.is_open()); // 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 @@ -405,7 +407,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_sub) BOOST_AUTO_TEST_CASE(test_chunked_viewing_fourbit) { - printf("test %i\n",++test_i); + printf("test %i\n", ++test_i); std::string test_name = "test_004"; std::string fasta_file = "test/data/" + test_name + ".fa"; @@ -521,6 +523,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_fourbit) std::string full_file = ">IUPAC\nNBKA\nHMDC\nUWGS\nYVTR\nHGWV\nUMTB\nSDN-\n----\n----\n-BGY\nADNH\nSMUT\nRCKW\nVsbh\nvdnr\ntgyc\nmkwu\naAVT\nSDKN\nB---\nUGWM\nHYRC\n";// length = 117 chunked_reader fhc = chunked_reader(fs.filename.c_str()); + fhc.fopen(0); for(uint32_t offset = 0; offset < 62; ++offset) { std::string substr_file = full_file.substr(offset, 200); @@ -547,7 +550,8 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_fourbit) // it can return less bytes than the buffer_size BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen) { - printf("test %i\n",++test_i); +#if DEBUG + printf("test %i\n", ++test_i); BOOST_REQUIRE_EQUAL(READ_BUFFER_SIZE, 4096);// required for this test @@ -573,6 +577,11 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen) delete[] buffer; delete cache_p0; + +#else + // for this test a small buffer size is needed, only used for debugging - therefore always test with debug on +#endif //DEBUG + } @@ -581,7 +590,9 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen) // it can return less bytes than the buffer_size BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen_lim) { - printf("test %i\n",++test_i); +#if DEBUG + + printf("test %i\n", ++test_i); BOOST_REQUIRE_EQUAL(READ_BUFFER_SIZE, 4096);// required for this test @@ -607,6 +618,11 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen_lim) delete[] buffer; delete cache_p0; + +#else + // for this test a small buffer size is needed, only used for debugging - therefore always test with debug on +#endif //DEBUG + } @@ -616,7 +632,9 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen_lim) // it can return less bytes than the buffer_size BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen2) { - printf("test %i\n",++test_i); +#if DEBUG + + printf("test %i\n", ++test_i); BOOST_REQUIRE_EQUAL(READ_BUFFER_SIZE, 4096);// required for this test @@ -649,6 +667,10 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen2) delete[] buffer; delete cache_p72; + +#else + // for this test a small buffer size is needed, only used for debugging - therefore always test with debug on +#endif //DEBUG } @@ -656,7 +678,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen2) BOOST_AUTO_TEST_CASE(test_chunked_viewing_zstd) { - printf("test %i\n",++test_i); + printf("test %i\n", ++test_i); std::string test_name = "test"; @@ -845,7 +867,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_zstd) BOOST_AUTO_TEST_CASE(test_chunked_viewing2) { - printf("test %i\n",++test_i); + printf("test %i\n", ++test_i); std::string test_name = "test_003"; std::string fasta_file = "test/data/" + test_name + ".fa"; @@ -883,6 +905,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing2) ffs2f_init* cache = fs.init_ffs2f(60, true); chunked_reader fhc = chunked_reader(fs.filename.c_str()); + fhc.fopen(0); /* maak alle substrings: [....] @@ -899,8 +922,8 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing2) size_t n = full_file.size(); uint32_t start_pos = 0; for(float i = 0.0; i <= 12.0; i += 1) { // perform limited subset of tests - start_pos = (uint32_t) ((i/12.0) * (double) n); - printf(" - %uli / %zu\n",start_pos, n); + start_pos = (uint32_t)((i / 12.0) * (double) n); + printf(" - %u / %zu\n", start_pos, n); for(uint32_t buffer_len = (uint32_t) full_file.size() - start_pos; buffer_len > 0; buffer_len--) { std::string substr_file = std::string(full_file, start_pos, buffer_len); diff --git a/tmp/database/index b/tmp/database/index new file mode 100644 index 00000000..e69de29b