From 15065e5a9ad5cd6c4528ec8911bfeef1b71c1c08 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sun, 22 Jan 2023 14:59:19 +0100 Subject: [PATCH] updates --- include/chunked_reader.hpp | 13 +- src/chunked_reader.cpp | 135 ++++----- src/fastafs.cpp | 2 +- src/ucsc2bit_to_fastafs.cpp | 2 +- src/xbit_byte_encoder.cpp | 2 +- test/chunked_reader/test_chunked_reader.cpp | 291 +++++++++++++------- test/fastafs/test_fastafs.cpp | 2 +- test/fastafs/test_ucsc2bit.cpp | 6 +- test/view/test_view.cpp | 2 +- 9 files changed, 254 insertions(+), 201 deletions(-) diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index 1c322dc7..d47b7533 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -38,9 +38,9 @@ class chunked_reader_old // dit is Context ZSTD_seekable_decompress_init_data* fh_zstd; std::string filename; // try doing this with inode - + compression_type filetype; - + char buffer[READ_BUFFER_SIZE + 1]; size_t buffer_i; size_t buffer_n; @@ -62,7 +62,10 @@ class chunked_reader_old // dit is Context void find_filetype(); void set_filetype(compression_type); - compression_type get_filetype() { return this->filetype ; }; + compression_type get_filetype() + { + return this->filetype ; + }; size_t read(char *, size_t);// @deprecate size_t read(unsigned char *, size_t); @@ -113,7 +116,7 @@ class ContextZstdSeekable : public State { private: ZSTD_seekable_decompress_init_data* fh = nullptr; - + size_t const buffOutSize = ZSTD_DStreamOutSize(); char* const buffOut = (char*) malloc_orDie(buffOutSize); ZSTD_seekable* const seekable = ZSTD_seekable_create(); //@todo -> in constructor, check if not NULL @@ -147,7 +150,7 @@ class chunked_reader // master chunked_reader void TransitionTo(State *); // @todo rename to set_compression_type chunked_reader(const char *) ; ~chunked_reader(); - + State* find_state(); const std::type_info& typeid_state(); diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 7cf46cf8..6d138c79 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -259,30 +259,26 @@ void State::set_context(chunked_reader *arg_context) // This does not read the actual flat file, this copies its internal buffer to arg_buffer_to size_t State::read(unsigned char *arg_buffer_to, size_t arg_buffer_to_size, - size_t &buffer_i, size_t &buffer_n) + size_t &buffer_i, size_t &buffer_n) { #if DEBUG - if(arg_buffer_to_size > READ_BUFFER_SIZE) - { + if(arg_buffer_to_size > READ_BUFFER_SIZE) { throw std::runtime_error("[ContextUncompressed::read] Requested buffer size larger than internal context buffer.\n"); } #endif //DEBUG size_t written = 0; const size_t n1 = std::min(buffer_n - buffer_i, arg_buffer_to_size);// number of characters to copy - + // copy current internal buffer completely - while(written < n1) - { + while(written < n1) { arg_buffer_to[written++] = this->context->get_buffer()[buffer_i++]; } - if(written < arg_buffer_to_size) - { + if(written < arg_buffer_to_size) { this->context->cache_buffer();// needs to set n to 0 - while(buffer_i < buffer_n and written < arg_buffer_to_size) - { + while(buffer_i < buffer_n and written < arg_buffer_to_size) { arg_buffer_to[written++] = this->context->get_buffer()[buffer_i++]; } } @@ -321,16 +317,11 @@ char * chunked_reader::get_buffer() //@todo remove and use typeid only compression_type chunked_reader::get_filetype() { - if(this->typeid_state() == typeid(ContextUncompressed)) - { + if(this->typeid_state() == typeid(ContextUncompressed)) { return compression_type::uncompressed; - } - else if(this->typeid_state() == typeid(ContextZstdSeekable)) - { + } else if(this->typeid_state() == typeid(ContextZstdSeekable)) { return compression_type::zstd; - } - else - { + } else { return compression_type::undefined; } } @@ -345,7 +336,7 @@ size_t chunked_reader::cache_buffer() this->buffer_i = 0; this->file_i += s; - + return s; } @@ -353,28 +344,26 @@ size_t chunked_reader::read(unsigned char *arg_buffer, size_t arg_buffer_size) { //arg_buffer_size = std::min(arg_buffer_size, (size_t) READ_BUFFER_SIZE); #if DEBUG - - if(arg_buffer == nullptr) - { + + if(arg_buffer == nullptr) { throw std::runtime_error("[chunked_reader::read] Invalid / not allocated buffer.\n"); } - if(arg_buffer_size > READ_BUFFER_SIZE) - { + if(arg_buffer_size > READ_BUFFER_SIZE) { throw std::runtime_error("[chunked_reader::read] Requested buffer size larger than internal context buffer.\n"); } - + #endif //DEBUG - + return this->state->read(arg_buffer, arg_buffer_size, this->buffer_i, this->buffer_n); } -void chunked_reader::TransitionTo(State *arg_state) { +void chunked_reader::TransitionTo(State *arg_state) +{ - if(this->state != nullptr) - { + if(this->state != nullptr) { delete this->state; // delete and destruct previous state, incl file points, should also run fh.close(); etc. } @@ -401,11 +390,11 @@ void chunked_reader::seek(off_t arg_offset) size_t chunked_reader::tell() { //printf("Context :: tell: %i - %i + %i = %i\n", - //this->file_i , - //this->buffer_n , + //this->file_i , + //this->buffer_n , //this->buffer_i , - //this->file_i - this->buffer_n + this->buffer_i); - + //this->file_i - this->buffer_n + this->buffer_i); + return this->file_i - this->buffer_n + this->buffer_i; } @@ -424,12 +413,9 @@ const std::type_info& chunked_reader::typeid_state() State *chunked_reader::find_state() { - if(is_zstd_file(this->filename.c_str())) - { + if(is_zstd_file(this->filename.c_str())) { return new ContextZstdSeekable; - } - else - { + } else { return new ContextUncompressed; } } @@ -437,24 +423,19 @@ State *chunked_reader::find_state() void ContextUncompressed::fopen(off_t start_pos = 0) { - if(this->fh != nullptr) - { + if(this->fh != nullptr) { throw std::runtime_error("[ContextUncompressed::fopen] opening a non closed reader.\n"); } - + this->fh = new std::ifstream; this->fh->open(this->context->get_filename().c_str(), std::ios::in | std::ios::binary | std::ios::ate); - if(this->fh == nullptr) - { + if(this->fh == nullptr) { throw std::runtime_error("[ContextUncompressed::fopen] empty fh?\n"); } - if(this->fh->is_open()) // @todo move to top-level fopen() - { + if(this->fh->is_open()) { // @todo move to top-level fopen() this->seek(start_pos); - } - else - { + } else { throw std::runtime_error("[chunked_reader_old::init] Cannot open file for reading.\n"); } } @@ -462,13 +443,11 @@ void ContextUncompressed::fopen(off_t start_pos = 0) size_t ContextUncompressed::cache_buffer() { #if DEBUG - if(this->fh->tellg() == -1) - { + if(this->fh->tellg() == -1) { throw std::runtime_error("ContextUncompressed::cache_buffer\n"); } - - if(this->context->get_buffer() == nullptr) - { + + if(this->context->get_buffer() == nullptr) { throw std::runtime_error("ContextUncompressed::cache_buffer - no valid buffer?\n"); } #endif //DEBUG @@ -492,7 +471,7 @@ size_t ContextUncompressed::cache_buffer() this->fh->clear(); this->fh->seekg(0, std::ios::end); } - + return s; } @@ -500,8 +479,7 @@ size_t ContextUncompressed::cache_buffer() void ContextUncompressed::seek(off_t arg_offset) { - if(!this->fh->is_open()) - { + if(!this->fh->is_open()) { throw std::runtime_error("[ContextUncompressed::seek] unexpected closed filehandle found.\n"); } @@ -512,11 +490,9 @@ void ContextUncompressed::seek(off_t arg_offset) ContextUncompressed::~ContextUncompressed() { - if(this->fh != nullptr) - { + if(this->fh != nullptr) { this->fh->close(); - if(!this->fh) - { + if(!this->fh) { std::cerr << "[ContextUncompressed::~ContextUncompressed] unexpected closed filehandle found.\n"; } @@ -531,20 +507,20 @@ size_t ContextZstdSeekable::cache_buffer() { //size_t written = ZSTD_seekable_decompressFile_orDie(this->fh_zstd, this->file_i, this->buffer, this->file_i + READ_BUFFER_SIZE); //this->fh->read(this->context->get_buffer(), READ_BUFFER_SIZE); - + // figure out the location in the decompressed file - + size_t written = ZSTD_seekable_decompressFile_orDie( - this->fh, - this->context->get_file_i(), //this->context->file_i, - this->context->get_buffer(), - this->context->tell() + READ_BUFFER_SIZE //this->context->file_i + READ_BUFFER_SIZE - ); + this->fh, + this->context->get_file_i(), //this->context->file_i, + this->context->get_buffer(), + this->context->tell() + READ_BUFFER_SIZE //this->context->file_i + READ_BUFFER_SIZE + ); //printf("written = %i\n", written); //printf("{{%s}}\n", this->context->get_buffer()); - + /* { #if DEBUG @@ -568,32 +544,31 @@ size_t ContextZstdSeekable::cache_buffer() */ //throw std::runtime_error("[ContextZstdSeekable::cache_buffer] not implemented.\n"); - + return written; } void ContextZstdSeekable::fopen(off_t start_pos) { - if(this->fh != nullptr) - { + if(this->fh != nullptr) { throw std::runtime_error("[ContextZstdSeekable::fopen] opening a non closed reader.\n"); } - - + + this->fh = ZSTD_seekable_decompressFile_init(this->context->get_filename().c_str()); - if((this->fh->fin == NULL) | feof(this->fh->fin)) - { + if((this->fh->fin == NULL) | feof(this->fh->fin)) { throw std::runtime_error("[ContextZstdSeekable::fopen] not implemented.\n"); - } - else - { + } else { fseek_orDie(this->fh->fin, start_pos, SEEK_SET);// set initial file handle to 0? // this->fh->seekg(start_pos, std::ios::beg); size_t const initResult = ZSTD_seekable_initFile(this->seekable, fh->fin); - if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); } + if(ZSTD_isError(initResult)) { + fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); + exit(11); + } //@todo class member? this->maxFileSize = ZSTD_seekable_getFileDecompressedSize(this->seekable); @@ -616,6 +591,6 @@ ContextZstdSeekable::~ContextZstdSeekable() delete this->fh; } - + //throw std::runtime_error("[ContextUncompressed::~ContextUncompressed] not implemented.\n"); } diff --git a/src/fastafs.cpp b/src/fastafs.cpp index 0a696cf3..20485bbd 100644 --- a/src/fastafs.cpp +++ b/src/fastafs.cpp @@ -641,7 +641,7 @@ void fastafs::load(std::string afilename) { std::streampos size; unsigned char *memblock; - + chunked_reader fh_in = chunked_reader(afilename.c_str()); { fh_in.fopen(0); diff --git a/src/ucsc2bit_to_fastafs.cpp b/src/ucsc2bit_to_fastafs.cpp index a108da11..60c7c618 100644 --- a/src/ucsc2bit_to_fastafs.cpp +++ b/src/ucsc2bit_to_fastafs.cpp @@ -51,7 +51,7 @@ size_t ucsc2bit_to_fastafs(std::string ucsc2bit_file, std::string fastafs_file) fh_fastafs << "\x00\x00\x00\x00"s;// position of metedata ~ unknown YET // Read UCSC2bit header (n seq) - fh_ucsc2bit.read( (char*)( &buffer[0]) , 12);//conversion from unsigned char* to char* (https://stackoverflow.com/questions/604431/c-reading-unsigned-char-from-file-stream) + fh_ucsc2bit.read((char*)(&buffer[0]), 12); //conversion from unsigned char* to char* (https://stackoverflow.com/questions/604431/c-reading-unsigned-char-from-file-stream) n = fourbytes_to_uint_ucsc2bit(buffer, 8); uint_to_fourbytes(buffer, n); std::vector data(n); diff --git a/src/xbit_byte_encoder.cpp b/src/xbit_byte_encoder.cpp index 29384592..f1a2c825 100644 --- a/src/xbit_byte_encoder.cpp +++ b/src/xbit_byte_encoder.cpp @@ -12,7 +12,7 @@ void xbit_byte_encoder::next(chunked_reader &r) unsigned char *buf = new unsigned char[2]; r.read(buf, 1); this->data = buf[0]; - + delete[] buf; } diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index 4faf6582..dfcecc99 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -57,13 +57,13 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) { // old init chunked_reader_old r_flat = chunked_reader_old(fastafs_file.c_str()); - + // Context equivalent - uncompressed chunked_reader c1(fastafs_file.c_str()); c1.fopen(0); BOOST_CHECK(c1.typeid_state() == typeid(ContextUncompressed)); BOOST_CHECK(c1.typeid_state() != typeid(ContextZstdSeekable)); - + // Context equivalent - compressed chunked_reader c2(fastafs_file_zstd.c_str()); c2.fopen(0); @@ -93,7 +93,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) BOOST_CHECK(c1.typeid_state() == typeid(ContextUncompressed)); BOOST_CHECK(c1.typeid_state() != typeid(ContextZstdSeekable)); } - + // Context equivalent - compressed zstd { BOOST_CHECK_EQUAL(c2.tell(), 0); @@ -135,7 +135,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); BOOST_CHECK_EQUAL(c1.tell(), 403); } - + // Context equivalent - compressed zstd { written = c2.read(buffer, 1024); @@ -254,11 +254,11 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) BOOST_CHECK_EQUAL(std_buffer.size(), reference3.size()); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); if(std_buffer.compare(reference3) != 0) { - printf("%i != %i\n",reference3.size(), std_buffer.size()); - printf("%s != %s\n",reference3, std_buffer); - - printf("[%u][%u][%u][%u]\n",(unsigned char) reference3[0],reference3[1],reference3[2],reference3[3]); - printf("[%u][%u][%u][%u]\n",(unsigned char) buffer[0],buffer[1],buffer[2],buffer[3]); + printf("%u != %u\n", (unsigned int) reference3.size(), (unsigned int) std_buffer.size()); + printf("%s != %s\n", reference3.c_str(), std_buffer.c_str()); + + printf("[%u][%u][%u][%u]\n", (unsigned char) reference3[0], reference3[1], reference3[2], reference3[3]); + printf("[%u][%u][%u][%u]\n", (unsigned char) buffer[0], buffer[1], buffer[2], buffer[3]); //printf("[%c][%c][%c][%c]\n"); } flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); @@ -266,28 +266,28 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) //@todo should trigger error!? - r_flat.seek(1024*1024); // trigger out of bound + r_flat.seek(1024 * 1024); // trigger out of bound // Context equivalent - uncompressed { - c1.seek(1024*1024); + c1.seek(1024 * 1024); } // Context equivalent - compressed zstd { - c2.seek(1024*1024); + c2.seek(1024 * 1024); } } { chunked_reader_old r_zstd = chunked_reader_old(fastafs_file_zstd.c_str()); - + // Context equivalent - uncompressed chunked_reader c1(fastafs_file.c_str()); c1.fopen(0); BOOST_CHECK(c1.typeid_state() == typeid(ContextUncompressed)); BOOST_CHECK(c1.typeid_state() != typeid(ContextZstdSeekable)); - + // Context equivalent - compressed chunked_reader c2(fastafs_file_zstd.c_str()); c2.fopen(0); @@ -301,7 +301,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - + { written = c1.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 403); @@ -354,12 +354,12 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) // test seek stuff r_zstd.seek(0); // reset to first pos in file BOOST_REQUIRE_EQUAL(r_zstd.tell(), 0); - + { c1.seek(0); // reset to first pos in file BOOST_REQUIRE_EQUAL(c1.tell(), 0); } - + { c2.seek(0); // reset to first pos in file BOOST_REQUIRE_EQUAL(c2.tell(), 0); @@ -412,7 +412,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - + { written = c1.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); @@ -421,7 +421,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - + { written = c2.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); @@ -465,13 +465,13 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) { chunked_reader_old r_flat = chunked_reader_old(fastafs_file.c_str()); - + // Context equivalent - uncompressed chunked_reader c1(fastafs_file.c_str()); c1.fopen(0); BOOST_CHECK(c1.typeid_state() == typeid(ContextUncompressed)); BOOST_CHECK(c1.typeid_state() != typeid(ContextZstdSeekable)); - + // Context equivalent - compressed chunked_reader c2(fastafs_file_zstd.c_str()); c2.fopen(0); @@ -485,16 +485,18 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content 1st read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - { // C1 + + { + // C1 written = c1.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 1024); std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content 1st read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - - { // C2 + + { + // C2 written = c2.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 1024); std_buffer = std::string(reinterpret_cast(&buffer), written); @@ -508,16 +510,18 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - { // C1 + + { + // C1 written = c1.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 569); std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - - { // C2 + + { + // C2 written = c2.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 569); std_buffer = std::string(reinterpret_cast(&buffer), written); @@ -528,14 +532,16 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) written = r_flat.read((char*) &buffer[0], 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - { // C1 + + { + // C1 written = c1.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - - { // C2 + + { + // C2 written = c2.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); @@ -545,111 +551,170 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { + // C1 + written = c1.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + { + // C2 + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + // set back r_flat.seek(1024); + { + // C1 + c1.seek(1024); + } + + { + // C2 + c2.seek(1024); + } + + written = r_flat.read((char*) &buffer[0], 1024); BOOST_CHECK_EQUAL(written, 569); std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { // C1 + written = c1.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 569); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + { // C2 + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 569); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + // set back r_flat.seek(4); + { // C1 + c1.seek(4); + } + + { // C2 + c2.seek(4); + } + written = r_flat.read((char*) &buffer[0], 1024);// reads across two buffers? BOOST_CHECK_EQUAL(written, 1024); std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - written = r_flat.read((char*) &buffer[0], 1024);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 565); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference4), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { // C1 + written = c1.read(buffer, 1024);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 1024); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + { // C2 + written = c2.read(buffer, 1024);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 1024); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } - r_flat.seek(4); - - written = r_flat.read((char*) &buffer[0], 4);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 4); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference5), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); written = r_flat.read((char*) &buffer[0], 1024);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 1024); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference6), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - } - - { - chunked_reader_old r_zstd = chunked_reader_old(fastafs_file_zstd.c_str()); - - written = r_zstd.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 1024); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content 1st read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - written = r_zstd.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 569); + BOOST_CHECK_EQUAL(written, 565); std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - written = r_zstd.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 0); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference4), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - written = r_zstd.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 0); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { // C1 + written = c1.read(buffer, 1024);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 565); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference4), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + { // C2 + written = c2.read(buffer, 1024);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 565); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference4), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } - // set back - r_zstd.seek(1024); - written = r_zstd.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 569); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + r_flat.seek(4); + { // C1 + c1.seek(4); + } - // set back - r_zstd.seek(4); + { // C2 + c2.seek(4); + } - written = r_zstd.read(buffer, 1024);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 1024); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - written = r_zstd.read(buffer, 1024);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 565); + written = r_flat.read((char*) &buffer[0], 4);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 4); std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference4), 0, "Difference in content 2nd read"); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference5), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { // C1 + written = c1.read(buffer, 4);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 4); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference5), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } - r_zstd.seek(4); + { // C2 + written = c2.read(buffer, 4);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 4); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference5), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } - written = r_zstd.read(buffer, 4);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 4); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference5), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - written = r_zstd.read(buffer, 1024);// reads across two buffers? + written = r_flat.read((char*) &buffer[0], 1024);// reads across two buffers? BOOST_CHECK_EQUAL(written, 1024); std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference6), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + + { // C1 + written = c1.read(buffer, 1024);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 1024); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference6), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + { // C2 + written = c2.read(buffer, 1024);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 1024); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference6), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } } } @@ -684,19 +749,31 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__new_style) { - ////chunked_reader_old r_flat = chunked_reader_old(fastafs_file.c_str()); - - chunked_reader c = chunked_reader(fasta_file.c_str()); - c.fopen(0); // open file handle and load buffer - - written = c.read(buffer, 10); - buffer[written] = '\0'; - - written = c.read(buffer, 100); - buffer[written] = '\0'; - - written = c.read(buffer, 100); - buffer[written] = '\0'; + chunked_reader c1 = chunked_reader(fastafs_file.c_str()); + c1.fopen(0); // open file handle and load buffer + + chunked_reader c2 = chunked_reader(fastafs_file_zstd.c_str()); + c2.fopen(0); // open file handle and load buffer + + { // C1 + written = c1.read(buffer, 1000); + buffer[written] = '\0'; + BOOST_CHECK_EQUAL(written, 1000); + + written = c1.read(buffer, 1000); + buffer[written] = '\0'; + BOOST_CHECK_EQUAL(written, 593); + } + + { // C2 + written = c2.read(buffer, 1000); + buffer[written] = '\0'; + BOOST_CHECK_EQUAL(written, 1000); + + written = c2.read(buffer, 1000); + buffer[written] = '\0'; + BOOST_CHECK_EQUAL(written, 593); + } } } diff --git a/test/fastafs/test_fastafs.cpp b/test/fastafs/test_fastafs.cpp index aded593d..53552e13 100644 --- a/test/fastafs/test_fastafs.cpp +++ b/test/fastafs/test_fastafs.cpp @@ -111,7 +111,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_seq_fastafile_size_padding_0) char chunk[1]; std::string ref = ">chr1\nttttccccaaaagggg\n"; - + for(uint32_t i = 0; i < ref.size(); i++) { ret = fs.data[0]->view_fasta_chunk(cache_p0->sequences[0], chunk, 1, i, file); BOOST_CHECK_EQUAL(chunk[0], ref[i]); // test for '>' diff --git a/test/fastafs/test_ucsc2bit.cpp b/test/fastafs/test_ucsc2bit.cpp index 1309e2d0..63df271e 100644 --- a/test/fastafs/test_ucsc2bit.cpp +++ b/test/fastafs/test_ucsc2bit.cpp @@ -465,10 +465,8 @@ BOOST_AUTO_TEST_CASE(test_fastafs_view_chunked_2bit_with_offset) uint32_t complen; // voor lengte 1...(245-1) // voor i = 0, 245-lengte - for(complen = 1; complen < reference.size(); complen++) - { - for(uint32_t file_offset = 0; file_offset < reference.size() - complen - 1; file_offset++) - { + for(complen = 1; complen < reference.size(); complen++) { + for(uint32_t file_offset = 0; file_offset < reference.size() - complen - 1; file_offset++) { fs.view_ucsc2bit_chunk(buffer, complen, file_offset); BOOST_CHECK_EQUAL_MESSAGE(reference.compare(file_offset, complen, std_string_nullbyte_safe(buffer, 0, complen), 0, complen), 0, "Failed during len=" << complen << " and file offset=" << file_offset); } diff --git a/test/view/test_view.cpp b/test/view/test_view.cpp index 74bdd94c..280ab90d 100644 --- a/test/view/test_view.cpp +++ b/test/view/test_view.cpp @@ -146,7 +146,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_twobit_offset_calc) fasta_to_fastafs("test/data/test.fa", fastafs_file, false); fastafs fs = fastafs("test"); fs.load(fastafs_file); - + for(uint32_t i = 0 ; i <= 7; i++) { in_N = fs.data[1]->get_n_offset(i, &num_Ns); BOOST_CHECK_EQUAL(num_Ns, 0);