From 7b19287feff96d8b21b3e6ad2a24180b94a766a7 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sat, 12 Feb 2022 12:13:57 +0100 Subject: [PATCH 01/65] minor OOP improvements to flags classes --- include/flags.hpp | 25 +++++++------------------ src/flags.cpp | 18 +++++++++++++++++- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/include/flags.hpp b/include/flags.hpp index d6874d87..8826afb5 100644 --- a/include/flags.hpp +++ b/include/flags.hpp @@ -38,23 +38,21 @@ constexpr std::array bitmasks = { }; -//#include "utils.hpp" - class twobit_flag { +private: + std::array bits; // 00000000 00000000 + protected: twobit_flag(); - std::array bits; // 00000000 00000000 - - // set by flag void set_flag(unsigned char, bool);// counting flag from bit 0(!) bool get_flag(unsigned char); public: void set(char *); - std::array &get_bits(void); // get bit 0 or bit 1 + std::array &get_bits(void); // get bit 0 or bit 1 - needed for exporting flags to file(s) }; @@ -62,10 +60,7 @@ class fastafs_flags : public twobit_flag { public: bool is_complete(); - bool is_incomplete() - { - return !this->is_complete(); - }; + bool is_incomplete(); void set_complete(); void set_incomplete(); @@ -83,16 +78,10 @@ class fastafs_sequence_flags : public twobit_flag bool is_protein(); // alphabet: 'ABCDEFGHIJKLMNOPQRSTUVWYZX*-' bool is_complete(); - bool is_incomplete() - { - return !this->is_complete(); - }; + bool is_incomplete(); // is not complete bool is_circular(); - bool is_linear() - { - return !this->is_circular(); - }; + bool is_linear(); // is not circular bool is_twobit() { diff --git a/src/flags.cpp b/src/flags.cpp index b3e11db1..2e62532b 100644 --- a/src/flags.cpp +++ b/src/flags.cpp @@ -41,10 +41,11 @@ bool twobit_flag::get_flag(unsigned char bit) // https://www.learncpp.com/cpp-tutorial/bit-manipulation-with-bitwise-operators-and-bit-masks/ void twobit_flag::set_flag(unsigned char bit, bool enable) { +#if DEBUG if(bit >= 16) { throw std::runtime_error("twobit_flag::set_flag = out of bound: " + std::to_string(bit) + "\n"); } - +#endif //DEBUG if(enable) { // //this->bits[bit / 8] |= bitmasks[bit]; @@ -69,6 +70,11 @@ bool fastafs_flags::is_complete() return this->get_flag(FASTAFS_BITFLAG_COMPLETE); } +bool fastafs_flags::is_incomplete() +{ + return !this->is_complete(); +} + void fastafs_flags::set_complete() { this->set_flag(FASTAFS_BITFLAG_COMPLETE, true); @@ -119,11 +125,21 @@ bool fastafs_sequence_flags::is_complete() return this->get_flag(FASTAFS_SEQUENCE_BITFLAG_COMPLETE); } +bool fastafs_sequence_flags::is_incomplete() +{ + return !this->is_complete(); +} + bool fastafs_sequence_flags::is_circular() { return this->get_flag(FASTAFS_SEQUENCE_BITFLAG_CIRCULAR); } +bool fastafs_sequence_flags::is_linear() +{ + return this->is_circular(); +} + From c66c207203cdd7334c42f3991a9db80c5b399e9c Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sat, 12 Feb 2022 12:25:42 +0100 Subject: [PATCH 02/65] minor OOP improvements to flags classes --- include/flags.hpp | 22 ++++++++-------------- src/flags.cpp | 39 ++++++++++++++++++++++++--------------- 2 files changed, 32 insertions(+), 29 deletions(-) diff --git a/include/flags.hpp b/include/flags.hpp index 8826afb5..bc5b361b 100644 --- a/include/flags.hpp +++ b/include/flags.hpp @@ -6,13 +6,13 @@ #include -const unsigned char FASTAFS_BITFLAG_COMPLETE = 0; +const static unsigned char FASTAFS_BITFLAG_COMPLETE = 0; -const unsigned char FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_1 = 0; -const unsigned char FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_2 = 1; -// const unsigned char FASTAFS_SEQUENCE_BITFLAG_???? = 2 ; // is reserved -const unsigned char FASTAFS_SEQUENCE_BITFLAG_COMPLETE = 3; -const unsigned char FASTAFS_SEQUENCE_BITFLAG_CIRCULAR = 4; +const static unsigned char FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_1 = 0; +const static unsigned char FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_2 = 1; +// const static unsigned char FASTAFS_SEQUENCE_BITFLAG_???? = 2 ; // is reserved +const static unsigned char FASTAFS_SEQUENCE_BITFLAG_COMPLETE = 3; +const static unsigned char FASTAFS_SEQUENCE_BITFLAG_CIRCULAR = 4; @@ -83,14 +83,8 @@ class fastafs_sequence_flags : public twobit_flag bool is_circular(); bool is_linear(); // is not circular - bool is_twobit() - { - return (this->is_dna() | this->is_rna()); - }; - bool is_fourbit() - { - return this->is_iupec_nucleotide(); - }; + bool is_twobit(); + bool is_fourbit(); // set by entity diff --git a/src/flags.cpp b/src/flags.cpp index 2e62532b..900d915f 100644 --- a/src/flags.cpp +++ b/src/flags.cpp @@ -37,8 +37,12 @@ bool twobit_flag::get_flag(unsigned char bit) } - -// https://www.learncpp.com/cpp-tutorial/bit-manipulation-with-bitwise-operators-and-bit-masks/ +/** + * @param bit denotes the i'th of 16 bits to set value of + * @param enable whether to enable of disable the bit + * + * more info: https://www.learncpp.com/cpp-tutorial/bit-manipulation-with-bitwise-operators-and-bit-masks/ + */ void twobit_flag::set_flag(unsigned char bit, bool enable) { #if DEBUG @@ -91,32 +95,28 @@ void fastafs_flags::set_incomplete() // alphabet: 'ACTG' + 'N' bool fastafs_sequence_flags::is_dna() { - return ( - this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_1) == false && - this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_2) == false); + return (this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_1) == false && + this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_2) == false); } // alphabet: 'ACUG' + 'N' bool fastafs_sequence_flags::is_rna() { - return ( - this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_1) == true && - this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_2) == false); + return (this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_1) == true && + this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_2) == false); } // alphabet: 'ACGTURYKMSWBDHVN' + '-' bool fastafs_sequence_flags::is_iupec_nucleotide() { - return ( - this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_1) == false && - this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_2) == true); + return (this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_1) == false && + this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_2) == true); } bool fastafs_sequence_flags::is_protein() { - return ( - this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_1) == true && - this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_2) == true); + return (this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_1) == true && + this->get_flag(FASTAFS_SEQUENCE_BITFLAG_SEQUENCE_TYPE_2) == true); } @@ -137,9 +137,18 @@ bool fastafs_sequence_flags::is_circular() bool fastafs_sequence_flags::is_linear() { - return this->is_circular(); + return !this->is_circular(); } +bool fastafs_sequence_flags::is_twobit() +{ + return (this->is_dna() | this->is_rna()); +} + +bool fastafs_sequence_flags::is_fourbit() +{ + return this->is_iupec_nucleotide(); +} From b353edfc61fbac37e6ea9b40c4d3afaafba76c73 Mon Sep 17 00:00:00 2001 From: youri Date: Sun, 13 Feb 2022 12:07:23 +0100 Subject: [PATCH 03/65] kdev4 --- fastafs.kdev4 | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 fastafs.kdev4 diff --git a/fastafs.kdev4 b/fastafs.kdev4 new file mode 100644 index 00000000..b0cc0f08 --- /dev/null +++ b/fastafs.kdev4 @@ -0,0 +1,4 @@ +[Project] +CreatedFrom=CMakeLists.txt +Manager=KDevCMakeManager +Name=fastafs From 06c378b3886c5b7b9c545976d614e4a5441133c1 Mon Sep 17 00:00:00 2001 From: youri Date: Tue, 15 Feb 2022 17:40:55 +0100 Subject: [PATCH 04/65] update oop structure --- .gitignore | 1 + include/sequence_region.hpp | 21 ++++-- src/fastafs.cpp | 14 ++-- src/sequence_region.cpp | 19 +++-- test/sequenceregion/test_sequenceregion.cpp | 78 ++++++++++----------- 5 files changed, 73 insertions(+), 60 deletions(-) diff --git a/.gitignore b/.gitignore index 6ac08067..a4347fa2 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,4 @@ perf.* analysis.txt *.naf .kdev4 +compile_commands.json diff --git a/include/sequence_region.hpp b/include/sequence_region.hpp index cb86ed87..7e9b2cda 100644 --- a/include/sequence_region.hpp +++ b/include/sequence_region.hpp @@ -21,19 +21,26 @@ class sequence_region { -public: - sequence_region(char *); - sequence_region(const char *); - std::string seq_name; +private: - bool has_defined_end; + bool defined_end;//whether the requested region has a defined end position (chr1:1-) has; (chr1:1-2) has not off_t start; off_t end; -private: - void parse(const char *); + void parse(const char *); + + +public: + sequence_region(char *); + sequence_region(const char *); + + std::string seq_name; + //const std::string& get_seq_name() {return seq_name; }; + bool has_defined_end(void) const {return defined_end; }; + off_t get_start_position(void) const {return start; }; + off_t get_end_position(void) const {return end; }; }; diff --git a/src/fastafs.cpp b/src/fastafs.cpp index d3dd7425..36cab91e 100644 --- a/src/fastafs.cpp +++ b/src/fastafs.cpp @@ -363,13 +363,13 @@ size_t fastafs_seq::view_sequence_region_size(ffs2f_init_seq* cache, sequence_re size_t total_requested_size; - if(sr->has_defined_end) { - total_requested_size = std::min((size_t) this->n, (size_t) sr->end + 1); + if(sr->has_defined_end()) { + total_requested_size = std::min((size_t) this->n, (size_t) sr->get_end_position() + 1); } else { total_requested_size = this->n; } - total_requested_size -= sr->start; + total_requested_size -= sr->get_start_position(); return total_requested_size; } @@ -395,13 +395,13 @@ uint32_t fastafs_seq::view_sequence_region(ffs2f_init_seq* cache, uint32_t written = 0; size_t total_requested_size; - if(sr->has_defined_end) { - total_requested_size = std::min((size_t) this->n, (size_t) sr->end + 1); + if(sr->has_defined_end()) { + total_requested_size = std::min((size_t) this->n, (size_t) sr->get_end_position() + 1); } else { total_requested_size = this->n; } - total_requested_size -= sr->start; + total_requested_size -= sr->get_start_position(); total_requested_size -= offset; total_requested_size = std::min(size, total_requested_size); @@ -409,7 +409,7 @@ uint32_t fastafs_seq::view_sequence_region(ffs2f_init_seq* cache, cache, // ffs2f_init_seq* cache, buffer, // char *buffer (size_t) total_requested_size, // size_t buffer_size, - (off_t) 2 + this->name.size() + sr->start + offset, // offset is for chunked reading + (off_t) 2 + this->name.size() + sr->get_start_position() + offset, // offset is for chunked reading fh ); diff --git a/src/sequence_region.cpp b/src/sequence_region.cpp index 4f7a2015..0faf16a2 100644 --- a/src/sequence_region.cpp +++ b/src/sequence_region.cpp @@ -4,7 +4,7 @@ sequence_region::sequence_region(char * seqstr) : - seq_name(""), has_defined_end(false), start(0), end(0) + seq_name(""), defined_end(false), start(0), end(0) { parse((const char *) seqstr);// char* can be converted to cost char*, but not vice versa @@ -12,7 +12,7 @@ sequence_region::sequence_region(char * seqstr) : } sequence_region::sequence_region(const char * seqstr) : - seq_name(""), has_defined_end(false), start(0), end(0) + seq_name(""), defined_end(false), start(0), end(0) { parse(seqstr); @@ -63,7 +63,7 @@ void sequence_region::parse(const char * seqstr) this->start = std::stoi(start); - this->has_defined_end = true; + this->defined_end = true; this->end = this->start; } else if(p2 == (p + 1)) {// chrA:-123 std::string end = std::string(seqstr, p2 + 1, strlen(seqstr) - p2 - 1); @@ -71,13 +71,13 @@ void sequence_region::parse(const char * seqstr) this->start = 0; this->end = std::stoi(end) ; - this->has_defined_end = true; + this->defined_end = true; } else if(p2 > (p + 1)) { // chrA:123- | chrA:123-456 | chrA:123-456ERR if(p2 + 1 == strlen(seqstr)) { // chrA:123- std::string start = std::string(seqstr, p + 1, p2 - p - 1); this->start = std::stoi(start); - this->has_defined_end = false; + this->defined_end = false; } else { // chrA:123-456 | chrA:123-456ERR std::string start = std::string(seqstr, p + 1, p2 - p - 1); std::string end = std::string(seqstr, p2 + 1, strlen(seqstr) - p2 - 1); @@ -85,14 +85,19 @@ void sequence_region::parse(const char * seqstr) this->start = std::stoi(start) ; - this->has_defined_end = true; + this->defined_end = true; this->end = std::stoi(end) ; } } } - if(this->has_defined_end and this->start > this->end) { +#if DEBUG + if(this->has_defined_end() and this->get_start_position() > this->get_end_position()) { throw std::invalid_argument("Invalid region - start larger than end."); } +#endif //DEBUG } + + + diff --git a/test/sequenceregion/test_sequenceregion.cpp b/test/sequenceregion/test_sequenceregion.cpp index 436338a1..28a0fab1 100644 --- a/test/sequenceregion/test_sequenceregion.cpp +++ b/test/sequenceregion/test_sequenceregion.cpp @@ -33,7 +33,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region) sequence_region sr = sequence_region(&(arg[5])); BOOST_CHECK_EQUAL(sr.seq_name, "chr1"); - BOOST_CHECK_EQUAL(sr.has_defined_end, false); // not defined; sequence's end + BOOST_CHECK_EQUAL(sr.has_defined_end(), false); // not defined; sequence's end } { @@ -41,7 +41,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region) sequence_region sr = sequence_region(&(arg[5])); BOOST_CHECK_EQUAL(sr.seq_name, "chr1"); - BOOST_CHECK_EQUAL(sr.has_defined_end, false); // not defined; sequence's end + BOOST_CHECK_EQUAL(sr.has_defined_end(), false); // not defined; sequence's end } { @@ -49,10 +49,10 @@ BOOST_AUTO_TEST_CASE(test_sequence_region) sequence_region sr = sequence_region(&(arg[5])); BOOST_CHECK_EQUAL(sr.seq_name, "chr1"); - BOOST_CHECK_EQUAL(sr.start, 123); + BOOST_CHECK_EQUAL(sr.get_start_position(), 123); - BOOST_CHECK_EQUAL(sr.has_defined_end, true); - BOOST_CHECK_EQUAL(sr.end, 123); + BOOST_CHECK_EQUAL(sr.has_defined_end(), true); + BOOST_CHECK_EQUAL(sr.get_end_position() , 123); } { @@ -60,10 +60,10 @@ BOOST_AUTO_TEST_CASE(test_sequence_region) sequence_region sr = sequence_region(&(arg[5])); BOOST_CHECK_EQUAL(sr.seq_name, "chr1"); - BOOST_CHECK_EQUAL(sr.start, 0); + BOOST_CHECK_EQUAL(sr.get_start_position(), 0); - BOOST_CHECK_EQUAL(sr.has_defined_end, true); - BOOST_CHECK_EQUAL(sr.end, 123); + BOOST_CHECK_EQUAL(sr.has_defined_end(), true); + BOOST_CHECK_EQUAL(sr.get_end_position() , 123); } { @@ -71,10 +71,10 @@ BOOST_AUTO_TEST_CASE(test_sequence_region) sequence_region sr = sequence_region(&(arg[5])); BOOST_CHECK_EQUAL(sr.seq_name, "chr1"); - BOOST_CHECK_EQUAL(sr.start, 123); + BOOST_CHECK_EQUAL(sr.get_start_position(), 123); - BOOST_CHECK_EQUAL(sr.has_defined_end, true); - BOOST_CHECK_EQUAL(sr.end, 456); + BOOST_CHECK_EQUAL(sr.has_defined_end(), true); + BOOST_CHECK_EQUAL(sr.get_end_position(), 456); } @@ -83,9 +83,9 @@ BOOST_AUTO_TEST_CASE(test_sequence_region) sequence_region sr = sequence_region(&(arg[5])); BOOST_CHECK_EQUAL(sr.seq_name, "chr1"); - BOOST_CHECK_EQUAL(sr.start, 123); + BOOST_CHECK_EQUAL(sr.get_start_position(), 123); - BOOST_CHECK_EQUAL(sr.has_defined_end, false); + BOOST_CHECK_EQUAL(sr.has_defined_end(), false); //BOOST_CHECK_EQUAL(sr.end , 456); - underfined } @@ -109,7 +109,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region3) sequence_region sr = sequence_region(&(arg[5])); BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.has_defined_end, false); // not defined; sequence's end + BOOST_CHECK_EQUAL(sr.has_defined_end(), false); // not defined; sequence's end } { @@ -117,7 +117,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region3) sequence_region sr = sequence_region(&(arg[5])); BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.has_defined_end, false); // not defined; sequence's end + BOOST_CHECK_EQUAL(sr.has_defined_end(), false); // not defined; sequence's end } { @@ -125,10 +125,10 @@ BOOST_AUTO_TEST_CASE(test_sequence_region3) sequence_region sr = sequence_region(&(arg[5])); BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.start, 1234); + BOOST_CHECK_EQUAL(sr.get_start_position(), 1234); - BOOST_CHECK_EQUAL(sr.has_defined_end, true); - BOOST_CHECK_EQUAL(sr.end, 1234); + BOOST_CHECK_EQUAL(sr.has_defined_end(), true); + BOOST_CHECK_EQUAL(sr.get_end_position() , 1234); } { @@ -136,10 +136,10 @@ BOOST_AUTO_TEST_CASE(test_sequence_region3) sequence_region sr = sequence_region(&(arg[5])); BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.start, 0); + BOOST_CHECK_EQUAL(sr.get_start_position(), 0); - BOOST_CHECK_EQUAL(sr.has_defined_end, true); - BOOST_CHECK_EQUAL(sr.end, 1234); + BOOST_CHECK_EQUAL(sr.has_defined_end(), true); + BOOST_CHECK_EQUAL(sr.get_end_position() , 1234); } { @@ -147,10 +147,10 @@ BOOST_AUTO_TEST_CASE(test_sequence_region3) sequence_region sr = sequence_region(&(arg[5])); BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.start, 1234); + BOOST_CHECK_EQUAL(sr.get_start_position(), 1234); - BOOST_CHECK_EQUAL(sr.has_defined_end, true); - BOOST_CHECK_EQUAL(sr.end, 1235); + BOOST_CHECK_EQUAL(sr.has_defined_end(), true); + BOOST_CHECK_EQUAL(sr.get_end_position() , 1235); } @@ -159,9 +159,9 @@ BOOST_AUTO_TEST_CASE(test_sequence_region3) sequence_region sr = sequence_region(&(arg[5])); BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.start, 1234); + BOOST_CHECK_EQUAL(sr.get_start_position(), 1234); - BOOST_CHECK_EQUAL(sr.has_defined_end, false); + BOOST_CHECK_EQUAL(sr.has_defined_end(), false); //BOOST_CHECK_EQUAL(sr.end , 1235); - underfined } @@ -187,7 +187,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region2) sequence_region sr = sequence_region(&(arg[5])); BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.has_defined_end, false); // not defined; sequence's end + BOOST_CHECK_EQUAL(sr.has_defined_end(), false); // not defined; sequence's end } { @@ -195,7 +195,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region2) sequence_region sr = sequence_region(&(arg[5])); BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.has_defined_end, false); // not defined; sequence's end + BOOST_CHECK_EQUAL(sr.has_defined_end(), false); // not defined; sequence's end } { @@ -203,10 +203,10 @@ BOOST_AUTO_TEST_CASE(test_sequence_region2) sequence_region sr = sequence_region(&(arg[5])); BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.start, 123); + BOOST_CHECK_EQUAL(sr.get_start_position(), 123); - BOOST_CHECK_EQUAL(sr.has_defined_end, true); - BOOST_CHECK_EQUAL(sr.end, 123); + BOOST_CHECK_EQUAL(sr.has_defined_end(), true); + BOOST_CHECK_EQUAL(sr.get_end_position(), 123); } { @@ -214,10 +214,10 @@ BOOST_AUTO_TEST_CASE(test_sequence_region2) sequence_region sr = sequence_region(&(arg[5])); BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.start, 0); + BOOST_CHECK_EQUAL(sr.get_start_position(), 0); - BOOST_CHECK_EQUAL(sr.has_defined_end, true); - BOOST_CHECK_EQUAL(sr.end, 123); + BOOST_CHECK_EQUAL(sr.has_defined_end(), true); + BOOST_CHECK_EQUAL(sr.get_end_position(), 123); } { @@ -225,10 +225,10 @@ BOOST_AUTO_TEST_CASE(test_sequence_region2) sequence_region sr = sequence_region(&(arg[5])); BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.start, 123); + BOOST_CHECK_EQUAL(sr.get_start_position(), 123); - BOOST_CHECK_EQUAL(sr.has_defined_end, true); - BOOST_CHECK_EQUAL(sr.end, 456); + BOOST_CHECK_EQUAL(sr.has_defined_end(), true); + BOOST_CHECK_EQUAL(sr.get_end_position(), 456); } @@ -237,9 +237,9 @@ BOOST_AUTO_TEST_CASE(test_sequence_region2) sequence_region sr = sequence_region(&(arg[5])); BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); - BOOST_CHECK_EQUAL(sr.start, 123); + BOOST_CHECK_EQUAL(sr.get_start_position(), 123); - BOOST_CHECK_EQUAL(sr.has_defined_end, false); + BOOST_CHECK_EQUAL(sr.has_defined_end(), false); //BOOST_CHECK_EQUAL(sr.end , 456); - underfined } From a8a1a7f1612aa55d29989a73eb1141bd28a8263f Mon Sep 17 00:00:00 2001 From: youri Date: Tue, 15 Feb 2022 18:04:49 +0100 Subject: [PATCH 05/65] whitespaces --- include/sequence_region.hpp | 1 - src/sequence_region.cpp | 6 +----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/include/sequence_region.hpp b/include/sequence_region.hpp index 7e9b2cda..96412597 100644 --- a/include/sequence_region.hpp +++ b/include/sequence_region.hpp @@ -21,7 +21,6 @@ class sequence_region { - private: bool defined_end;//whether the requested region has a defined end position (chr1:1-) has; (chr1:1-2) has not diff --git a/src/sequence_region.cpp b/src/sequence_region.cpp index 0faf16a2..0632a594 100644 --- a/src/sequence_region.cpp +++ b/src/sequence_region.cpp @@ -2,21 +2,17 @@ #include "sequence_region.hpp" - sequence_region::sequence_region(char * seqstr) : seq_name(""), defined_end(false), start(0), end(0) { - parse((const char *) seqstr);// char* can be converted to cost char*, but not vice versa - } + sequence_region::sequence_region(const char * seqstr) : seq_name(""), defined_end(false), start(0), end(0) { - parse(seqstr); - } From 36a1eb7b146c70b6830de6e7a6cd6a42ab6086fc Mon Sep 17 00:00:00 2001 From: youri Date: Tue, 15 Feb 2022 18:56:59 +0100 Subject: [PATCH 06/65] more oop restructuring --- include/sequence_region.hpp | 12 ++++--- src/fastafs.cpp | 4 +-- src/sequence_region.cpp | 5 +-- test/sequenceregion/test_sequenceregion.cpp | 36 ++++++++++----------- 4 files changed, 31 insertions(+), 26 deletions(-) diff --git a/include/sequence_region.hpp b/include/sequence_region.hpp index 96412597..b0908b79 100644 --- a/include/sequence_region.hpp +++ b/include/sequence_region.hpp @@ -19,6 +19,7 @@ + class sequence_region { private: @@ -28,18 +29,21 @@ class sequence_region off_t start; off_t end; + std::string seq_name; + + void parse(const char *); public: sequence_region(char *); - sequence_region(const char *); + sequence_region(const char * seqstr ); - std::string seq_name; - //const std::string& get_seq_name() {return seq_name; }; - bool has_defined_end(void) const {return defined_end; }; + + std::string get_seq_name() {return seq_name; }; off_t get_start_position(void) const {return start; }; off_t get_end_position(void) const {return end; }; + bool has_defined_end(void) const {return defined_end; }; }; diff --git a/src/fastafs.cpp b/src/fastafs.cpp index 36cab91e..e314a2d1 100644 --- a/src/fastafs.cpp +++ b/src/fastafs.cpp @@ -850,7 +850,7 @@ size_t fastafs::view_sequence_region_size(ffs2f_init* cache, const char *seq_reg // 02 : check if 'chr' is equals this->data[i].name for(size_t i = 0; i < this->data.size(); i++) { - if(sr.seq_name.compare(this->data[i]->name) == 0) { + if(sr.get_seq_name().compare(this->data[i]->name) == 0) { return this->data[i]->view_sequence_region_size(cache->sequences[i], &sr, &file); } } @@ -885,7 +885,7 @@ uint32_t fastafs::view_sequence_region(ffs2f_init* cache, const char *seq_region // 02 : check if 'chr' is equals this->data[i].name for(size_t i = 0; i < this->data.size(); i++) { - if(sr.seq_name.compare(this->data[i]->name) == 0) { + if(sr.get_seq_name().compare(this->data[i]->name) == 0) { return this->data[i]->view_sequence_region(cache->sequences[i], &sr, buffer, buffer_size, file_offset, fh); } } diff --git a/src/sequence_region.cpp b/src/sequence_region.cpp index 0632a594..6c3b9aeb 100644 --- a/src/sequence_region.cpp +++ b/src/sequence_region.cpp @@ -3,14 +3,14 @@ sequence_region::sequence_region(char * seqstr) : - seq_name(""), defined_end(false), start(0), end(0) + defined_end(false), start(0), end(0), seq_name("") { parse((const char *) seqstr);// char* can be converted to cost char*, but not vice versa } sequence_region::sequence_region(const char * seqstr) : - seq_name(""), defined_end(false), start(0), end(0) + defined_end(false), start(0), end(0), seq_name("") { parse(seqstr); } @@ -97,3 +97,4 @@ void sequence_region::parse(const char * seqstr) + diff --git a/test/sequenceregion/test_sequenceregion.cpp b/test/sequenceregion/test_sequenceregion.cpp index 28a0fab1..9ef05577 100644 --- a/test/sequenceregion/test_sequenceregion.cpp +++ b/test/sequenceregion/test_sequenceregion.cpp @@ -32,7 +32,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region) char arg[] = "/seq/chr1"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chr1"); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chr1"); BOOST_CHECK_EQUAL(sr.has_defined_end(), false); // not defined; sequence's end } @@ -40,7 +40,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region) char arg[] = "/seq/chr1:"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chr1"); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chr1"); BOOST_CHECK_EQUAL(sr.has_defined_end(), false); // not defined; sequence's end } @@ -48,7 +48,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region) char arg[] = "/seq/chr1:123"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chr1"); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chr1"); BOOST_CHECK_EQUAL(sr.get_start_position(), 123); BOOST_CHECK_EQUAL(sr.has_defined_end(), true); @@ -59,7 +59,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region) char arg[] = "/seq/chr1:-123"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chr1"); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chr1"); BOOST_CHECK_EQUAL(sr.get_start_position(), 0); BOOST_CHECK_EQUAL(sr.has_defined_end(), true); @@ -70,7 +70,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region) char arg[] = "/seq/chr1:123-456"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chr1"); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chr1"); BOOST_CHECK_EQUAL(sr.get_start_position(), 123); BOOST_CHECK_EQUAL(sr.has_defined_end(), true); @@ -82,7 +82,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region) char arg[] = "/seq/chr1:123-"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chr1"); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chr1"); BOOST_CHECK_EQUAL(sr.get_start_position(), 123); BOOST_CHECK_EQUAL(sr.has_defined_end(), false); @@ -108,7 +108,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region3) char arg[] = "/seq/chrRr1"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); BOOST_CHECK_EQUAL(sr.has_defined_end(), false); // not defined; sequence's end } @@ -116,7 +116,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region3) char arg[] = "/seq/chrRr1:"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); BOOST_CHECK_EQUAL(sr.has_defined_end(), false); // not defined; sequence's end } @@ -124,7 +124,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region3) char arg[] = "/seq/chrRr1:1234"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); BOOST_CHECK_EQUAL(sr.get_start_position(), 1234); BOOST_CHECK_EQUAL(sr.has_defined_end(), true); @@ -135,7 +135,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region3) char arg[] = "/seq/chrRr1:-1234"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); BOOST_CHECK_EQUAL(sr.get_start_position(), 0); BOOST_CHECK_EQUAL(sr.has_defined_end(), true); @@ -146,7 +146,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region3) char arg[] = "/seq/chrRr1:1234-1235"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); BOOST_CHECK_EQUAL(sr.get_start_position(), 1234); BOOST_CHECK_EQUAL(sr.has_defined_end(), true); @@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region3) char arg[] = "/seq/chrRr1:1234-"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); BOOST_CHECK_EQUAL(sr.get_start_position(), 1234); BOOST_CHECK_EQUAL(sr.has_defined_end(), false); @@ -186,7 +186,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region2) char arg[] = "/seq/chrRr1"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); BOOST_CHECK_EQUAL(sr.has_defined_end(), false); // not defined; sequence's end } @@ -194,7 +194,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region2) char arg[] = "/seq/chrRr1:"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); BOOST_CHECK_EQUAL(sr.has_defined_end(), false); // not defined; sequence's end } @@ -202,7 +202,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region2) char arg[] = "/seq/chrRr1:123"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); BOOST_CHECK_EQUAL(sr.get_start_position(), 123); BOOST_CHECK_EQUAL(sr.has_defined_end(), true); @@ -213,7 +213,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region2) char arg[] = "/seq/chrRr1:-123"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); BOOST_CHECK_EQUAL(sr.get_start_position(), 0); BOOST_CHECK_EQUAL(sr.has_defined_end(), true); @@ -224,7 +224,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region2) char arg[] = "/seq/chrRr1:123-456"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); BOOST_CHECK_EQUAL(sr.get_start_position(), 123); BOOST_CHECK_EQUAL(sr.has_defined_end(), true); @@ -236,7 +236,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region2) char arg[] = "/seq/chrRr1:123-"; sequence_region sr = sequence_region(&(arg[5])); - BOOST_CHECK_EQUAL(sr.seq_name, "chrRr1"); + BOOST_CHECK_EQUAL(sr.get_seq_name(), "chrRr1"); BOOST_CHECK_EQUAL(sr.get_start_position(), 123); BOOST_CHECK_EQUAL(sr.has_defined_end(), false); From f2d833d3e5bbffbde487d721606f09f4efdbd49e Mon Sep 17 00:00:00 2001 From: youri Date: Tue, 15 Feb 2022 20:33:18 +0100 Subject: [PATCH 07/65] oop updates --- include/sequence_region.hpp | 4 ++-- src/database.cpp | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/include/sequence_region.hpp b/include/sequence_region.hpp index b0908b79..e59c1c58 100644 --- a/include/sequence_region.hpp +++ b/include/sequence_region.hpp @@ -20,10 +20,10 @@ + class sequence_region { private: - bool defined_end;//whether the requested region has a defined end position (chr1:1-) has; (chr1:1-2) has not off_t start; @@ -32,7 +32,7 @@ class sequence_region std::string seq_name; - void parse(const char *); + void parse(const char *); public: diff --git a/src/database.cpp b/src/database.cpp index fd7028f4..1a57d150 100644 --- a/src/database.cpp +++ b/src/database.cpp @@ -116,9 +116,16 @@ void database::list() } + + // @todo return a filestream to a particular file one day? std::string database::add(char *name) { + if(this->get(std::string(name)) != "") + { + throw std::runtime_error("Trying to add duplicate entry to database."); + } + std::ofstream outputFile; outputFile.open(this->idx, std::fstream::app); @@ -135,7 +142,7 @@ std::string database::add(char *name) */ std::string database::get(std::string fastafs_name_or_id) { - std::string fname; + std::string fname = ""; std::ifstream infile(this->idx); std::string line; From d36487e4cc5e28221fae3464919af1220de15cdf Mon Sep 17 00:00:00 2001 From: youri Date: Tue, 15 Feb 2022 20:58:57 +0100 Subject: [PATCH 08/65] more portable version of database controller, should crash on duplicate entries --- CMakeLists.txt | 5 ++++ include/database.hpp | 8 ++++--- src/database.cpp | 20 +++++++++++++--- src/fuse.cpp | 2 +- src/main.cpp | 10 ++++---- test/cache/test_cache.cpp | 2 +- test/database/test_database.cpp | 24 +++++++++++++++++++ .../test_fivebit_fivebytes.cpp | 2 +- test/utils/test_utils.cpp | 2 +- 9 files changed, 60 insertions(+), 15 deletions(-) create mode 100644 test/database/test_database.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 5cfeee9a..56ab9fce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -201,6 +201,11 @@ add_test(test_chunked_reader ${BUILD_TEST_DIR}/test_chunked_reader) set_target_properties(test_chunked_reader PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${BUILD_TEST_DIR}) target_link_libraries(test_chunked_reader libfastafs) +add_executable(test_database test/database/test_database.cpp) +set_target_properties(test_database PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${BUILD_TEST_DIR}) +target_link_libraries(test_database libfastafs) +add_test(test_database ${BUILD_TEST_DIR}/test_database) + add_executable(test_fastafs_as_ucsc2bit test/fastafs/test_ucsc2bit.cpp) add_test(test_fastafs_as_ucsc2bit ${BUILD_TEST_DIR}/test_fastafs_as_ucsc2bit) set_target_properties(test_fastafs_as_ucsc2bit PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${BUILD_TEST_DIR}) diff --git a/include/database.hpp b/include/database.hpp index 98f44784..77d59ea9 100644 --- a/include/database.hpp +++ b/include/database.hpp @@ -10,12 +10,14 @@ class database { - std::string path; - std::string idx;// current default: ~/.local/share/fastafs/ + const std::string path; + const std::string idx;// current default: ~/.local/share/fastafs/ //hash_map idx;// "test": path + "/" + tostr(i) + ".fastafs" public: - database(); + database(const std::string &); + + const static std::string get_default_dir(); void force_db_exists(); std::string add(char *); diff --git a/src/database.cpp b/src/database.cpp index 1a57d150..cd6bb4f3 100644 --- a/src/database.cpp +++ b/src/database.cpp @@ -10,6 +10,20 @@ #include "fastafs.hpp" #include "lsfastafs.hpp" + + +const std::string database::get_default_dir() +{ + std::string home = std::string(getenv("HOME")); + if(home == "") + { + + } + return home + "/.local/share/fastafs"; +} + + + void database::force_db_exists() { DIR *dir = opendir(this->path.c_str()); @@ -34,9 +48,9 @@ void database::force_db_exists() -database::database() : - path(std::string(getenv("HOME")) + "/.local/share/fastafs"), - idx(std::string(getenv("HOME")) + "/.local/share/fastafs/index") +database::database(const std::string &path_arg) : + path(path_arg), + idx(path_arg + "/index") { this->load(); } diff --git a/src/fuse.cpp b/src/fuse.cpp index 8490ac32..8d59e8ee 100644 --- a/src/fuse.cpp +++ b/src/fuse.cpp @@ -702,7 +702,7 @@ fuse_instance *parse_args(int argc, char **argv, char **argv_fuse) size_t lastindex = name.find_last_of("."); name = name.substr(0, lastindex); } else { - database d = database(); + database d = database(database::get_default_dir()); fname = d.get(argv[mount_target_arg]); if(fname.size() == 0) { // invalid mount argument, don't bind fastafs object diff --git a/src/main.cpp b/src/main.cpp index a0330556..46df58e8 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -120,7 +120,7 @@ int main(int argc, char *argv[]) // reserve place in database std::string fname_out; if(to_cache) { - database d = database(); + database d = database(database::get_default_dir()); fname_out = d.add(argv[argc - 2]); } else { fname_out = std::string(argv[argc - 2]); @@ -196,7 +196,7 @@ int main(int argc, char *argv[]) if(from_file) { fname = std::string(argv[argc - 1]); } else { - database d = database(); + database d = database(database::get_default_dir()); fname = d.get(argv[argc - 1]); if(fname.size() == 0) { @@ -255,7 +255,7 @@ int main(int argc, char *argv[]) if(from_file) { fname = std::string(argv[argc - 1]); } else { - database d = database(); + database d = database(database::get_default_dir()); fname = d.get(argv[argc - 1]); if(fname.size() == 0) { @@ -274,7 +274,7 @@ int main(int argc, char *argv[]) } else if(strcmp(argv[1], "mount") == 0) { fuse(argc, argv); } else if(strcmp(argv[1], "list") == 0) { - database d = database(); + database d = database(database::get_default_dir()); d.list(); } else if(strcmp(argv[1], "ps") == 0) { std::unordered_multimap > fastafs_fuse_mounts = get_fastafs_processes(); @@ -304,7 +304,7 @@ int main(int argc, char *argv[]) if(from_file) { fname = std::string(argv[argc - 1]); } else { - database d = database(); + database d = database(database::get_default_dir()); fname = d.get(argv[argc - 1]); if(fname.size() == 0) { diff --git a/test/cache/test_cache.cpp b/test/cache/test_cache.cpp index 1d5b5086..97785a7c 100644 --- a/test/cache/test_cache.cpp +++ b/test/cache/test_cache.cpp @@ -1048,7 +1048,7 @@ BOOST_AUTO_TEST_CASE(test_cache_protein2) BOOST_CHECK_EQUAL(buffer[i], reference[i]); if(reference[i] != buffer[i]) { - printf("comparing char %u ** mismatch [ref] %d %02hhX != [buf] (%u x %02hhX)\n", i, reference[i], reference[i], buffer[i], (unsigned char) buffer[i], buffer[i]); + printf("comparing char %u ** mismatch [ref] %d %02hhX != [buf] (%u x %02hhX)\n", i, reference[i], reference[i], buffer[i], (unsigned char) buffer[i]); } } diff --git a/test/database/test_database.cpp b/test/database/test_database.cpp new file mode 100644 index 00000000..f190d89b --- /dev/null +++ b/test/database/test_database.cpp @@ -0,0 +1,24 @@ +#define BOOST_TEST_MODULE database + +#include + +#include "config.hpp" + +#include "database.hpp" + + +//#include +//#include + + +BOOST_AUTO_TEST_SUITE(Testing) + + +BOOST_AUTO_TEST_CASE(test_database__01) +{ + database d("."); +} + + + +BOOST_AUTO_TEST_SUITE_END() diff --git a/test/fivebit_fivebytes/test_fivebit_fivebytes.cpp b/test/fivebit_fivebytes/test_fivebit_fivebytes.cpp index bc56bd2d..0d3f73ce 100644 --- a/test/fivebit_fivebytes/test_fivebit_fivebytes.cpp +++ b/test/fivebit_fivebytes/test_fivebit_fivebytes.cpp @@ -99,7 +99,7 @@ BOOST_AUTO_TEST_CASE(test_fivebit_fivebytes_conversions) -BOOST_AUTO_TEST_CASE(test_dict_conv) +BOOST_AUTO_TEST_CASE(test_fivebit_fivebytes__dict_conversions) { char hash[255]; hash['A'] = 0; diff --git a/test/utils/test_utils.cpp b/test/utils/test_utils.cpp index 50dc2564..50fd919e 100644 --- a/test/utils/test_utils.cpp +++ b/test/utils/test_utils.cpp @@ -16,7 +16,7 @@ BOOST_AUTO_TEST_SUITE(Testing) /** * @description test contains a sequence that intially failed chunked_view with chunk size > 1 */ -BOOST_AUTO_TEST_CASE(test__twobytes_to_uint) +BOOST_AUTO_TEST_CASE(test_utils__twobytes_to_uint) { { // test: 00000000 00000000 = 0 From 00e689abf715c2e962f4db430fe7228bf5c2fe7a Mon Sep 17 00:00:00 2001 From: youri Date: Tue, 15 Feb 2022 21:57:19 +0100 Subject: [PATCH 09/65] updates to database --- include/database.hpp | 2 +- src/database.cpp | 22 ++++++++++++++++------ test/database/test_database.cpp | 17 ++++++++++++++++- tmp/database/index | 0 4 files changed, 33 insertions(+), 8 deletions(-) create mode 100644 tmp/database/index diff --git a/include/database.hpp b/include/database.hpp index 77d59ea9..06f16734 100644 --- a/include/database.hpp +++ b/include/database.hpp @@ -23,5 +23,5 @@ class database std::string add(char *); void load();// reads path + "/" + info.txt, only containing N void list();// 'ls' - std::string get(std::string); + std::string get(char *); }; diff --git a/src/database.cpp b/src/database.cpp index cd6bb4f3..f9afe153 100644 --- a/src/database.cpp +++ b/src/database.cpp @@ -5,6 +5,9 @@ #include #include #include +#include +#include +#include #include "database.hpp" #include "fastafs.hpp" @@ -14,12 +17,19 @@ const std::string database::get_default_dir() { - std::string home = std::string(getenv("HOME")); - if(home == "") + const char* home_c = getenv("HOME"); + if(home_c == nullptr) { - + struct passwd *pw = getpwuid(getuid()); + home_c = pw->pw_dir; + + if(home_c == nullptr) + { + throw std::runtime_error("Could not deterimine home dir. Also, no $HOME environment variable is set."); + } } - return home + "/.local/share/fastafs"; + std::string home_s = std::string(home_c); + return home_s + "/.local/share/fastafs"; } @@ -135,7 +145,7 @@ void database::list() // @todo return a filestream to a particular file one day? std::string database::add(char *name) { - if(this->get(std::string(name)) != "") + if(this->get(name) != "") { throw std::runtime_error("Trying to add duplicate entry to database."); } @@ -154,7 +164,7 @@ std::string database::add(char *name) /** * @brief searches for a filename that corresponds to the uid */ -std::string database::get(std::string fastafs_name_or_id) +std::string database::get(char *fastafs_name_or_id) { std::string fname = ""; std::ifstream infile(this->idx); diff --git a/test/database/test_database.cpp b/test/database/test_database.cpp index f190d89b..fe11086a 100644 --- a/test/database/test_database.cpp +++ b/test/database/test_database.cpp @@ -16,9 +16,24 @@ BOOST_AUTO_TEST_SUITE(Testing) BOOST_AUTO_TEST_CASE(test_database__01) { - database d("."); + const std::string default_dir_1 = database::get_default_dir(); + unsetenv("HOME"); + const std::string default_dir_2 = database::get_default_dir(); + + BOOST_REQUIRE(default_dir_1.size() > 0); + BOOST_REQUIRE(default_dir_2.size() > 0); + + BOOST_CHECK_EQUAL(default_dir_1, default_dir_2); + // printf("[%s]==[%s]\n", default_dir_1.c_str(), default_dir_2.c_str()); } +BOOST_AUTO_TEST_CASE(test_database__02) +{ + database d("tmp/database"); +} + + + BOOST_AUTO_TEST_SUITE_END() diff --git a/tmp/database/index b/tmp/database/index new file mode 100644 index 00000000..e69de29b From 25b1dde6cfdf9379cb54e742fe71eb6bfbd79bee Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Thu, 17 Feb 2022 09:14:47 +0100 Subject: [PATCH 10/65] sav --- include/fourbit_byte.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/fourbit_byte.hpp b/include/fourbit_byte.hpp index f6decccf..791c47a0 100644 --- a/include/fourbit_byte.hpp +++ b/include/fourbit_byte.hpp @@ -17,8 +17,8 @@ class fourbit_byte static const char n_fill_masked = '-'; static const unsigned char bits_per_nucleotide = 4; - static const char nucleotides_per_byte = 8 / bits_per_nucleotide ; // this is about compressed data - static const char nucleotides_per_chunk = 8 / bits_per_nucleotide ; // this is about decompressed chunks + static const char nucleotides_per_byte = 8 / fourbit_byte::bits_per_nucleotide ; // this is about compressed data + static const char nucleotides_per_chunk = 8 / fourbit_byte::bits_per_nucleotide ; // this is about decompressed chunks unsigned char data; void set(unsigned char, unsigned char); @@ -31,6 +31,8 @@ class fourbit_byte static const off_t nucleotides_to_compressed_fileoffset(size_t); // file offset waarna gelezen kan worden static const off_t nucleotides_to_compressed_offset(size_t);// aantal bytes nodig om zoveel data weg te schrijven + //@todo chunked reader should be in a function above this. + //next(char *) should be implemented with decompressed content only void next(chunked_reader &); // update the compressed data and set buffer to decompressed data }; From 139c74eeee1df2c9f6ae40a29c08e672625c1155 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Thu, 17 Feb 2022 09:46:25 +0100 Subject: [PATCH 11/65] improvements in OOP structure --- CMakeLists.txt | 3 +++ include/fourbit_byte.hpp | 10 ++++--- include/twobit_byte.hpp | 16 +++++++----- include/xbit_byte_encoder.hpp | 49 +++++++++++++++++++++++++++++++++++ src/xbit_byte_encoder.cpp | 8 ++++++ 5 files changed, 75 insertions(+), 11 deletions(-) create mode 100644 include/xbit_byte_encoder.hpp create mode 100644 src/xbit_byte_encoder.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 56ab9fce..5b1587cf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -128,9 +128,12 @@ add_library(libfastafs SHARED src/flags.cpp src/fastafs.cpp src/ucsc2bit.cpp + + src/xbit_byte_encoder.cpp src/twobit_byte.cpp src/fourbit_byte.cpp src/fivebit_fivebytes.cpp + src/database.cpp src/utils.cpp src/sequence_region.cpp diff --git a/include/fourbit_byte.hpp b/include/fourbit_byte.hpp index 791c47a0..86bb34e1 100644 --- a/include/fourbit_byte.hpp +++ b/include/fourbit_byte.hpp @@ -6,17 +6,19 @@ #include "config.hpp" #include "chunked_reader.hpp" +#include "xbit_byte_encoder.hpp" -class fourbit_byte +class fourbit_byte : xbit_byte_encoder { public: + static const char xbit_byte_encoder::n_fill_unmasked = '-'; + static const char xbit_byte_encoder::n_fill_masked = '-'; + static const unsigned char xbit_byte_encoder::bits_per_nucleotide = 4; + static const char fourbit_alhpabet[17]; static char encode_hash[256][3]; - static const char n_fill_unmasked = '-'; - static const char n_fill_masked = '-'; - static const unsigned char bits_per_nucleotide = 4; static const char nucleotides_per_byte = 8 / fourbit_byte::bits_per_nucleotide ; // this is about compressed data static const char nucleotides_per_chunk = 8 / fourbit_byte::bits_per_nucleotide ; // this is about decompressed chunks diff --git a/include/twobit_byte.hpp b/include/twobit_byte.hpp index 798bee8c..ab424041 100644 --- a/include/twobit_byte.hpp +++ b/include/twobit_byte.hpp @@ -6,22 +6,24 @@ #include "config.hpp" #include "chunked_reader.hpp" +#include "xbit_byte_encoder.hpp" - -class twobit_byte +class twobit_byte : xbit_byte_encoder { private: // things only needed by the compression [encoding, not decoding] public: + static const char xbit_byte_encoder::n_fill_unmasked = 'N'; + static const char xbit_byte_encoder::n_fill_masked = 'n'; + static const unsigned char xbit_byte_encoder::bits_per_nucleotide = 2; + char (&encode_hash)[256][5]; twobit_byte(char (&encode_hash_arg)[256][5]): encode_hash(encode_hash_arg) {}; - static const char n_fill_unmasked = 'N'; - static const char n_fill_masked = 'n'; - static const unsigned char bits_per_nucleotide = 2; - static const char nucleotides_per_byte = 8 / bits_per_nucleotide ; // this is about compressed data - static const char nucleotides_per_chunk = 8 / bits_per_nucleotide ; // this is about decompressed chunks + + static const char nucleotides_per_byte = 8 / twobit_byte::bits_per_nucleotide ; // this is about compressed data + static const char nucleotides_per_chunk = 8 / twobit_byte::bits_per_nucleotide ; // this is about decompressed chunks unsigned char data; // go private void set(unsigned char, unsigned char); diff --git a/include/xbit_byte_encoder.hpp b/include/xbit_byte_encoder.hpp new file mode 100644 index 00000000..6599a328 --- /dev/null +++ b/include/xbit_byte_encoder.hpp @@ -0,0 +1,49 @@ + +#ifndef XBIT_BYTE_HPP +#define XBIT_BYTE_HPP + +#include +#include "config.hpp" + +#include "chunked_reader.hpp" + + + +class xbit_byte_encoder +{ +private: + +public: + // these members need to be overwritten by parental classes + static const char n_fill_unmasked; + static const char n_fill_masked; + + static const unsigned char bits_per_nucleotide; + + xbit_byte_encoder() {}; + +/* + char (&encode_hash)[256][5]; + twobit_byte(char (&encode_hash_arg)[256][5]): encode_hash(encode_hash_arg) {}; + + unsigned char data; // go private + void set(unsigned char, unsigned char); + void set(char*);// string met 4 bytes set + char *get(void); + char *get(unsigned char); + + static unsigned char iterator_to_offset(unsigned int); + + static const off_t nucleotides_to_compressed_fileoffset(size_t); // file offset waarna gelezen kan worden + static const off_t nucleotides_to_compressed_offset(size_t);// aantal bytes nodig om zoveel data weg te schrijven + + void next(chunked_reader &); // update the compressed data + */ +}; + + + + + + +#endif diff --git a/src/xbit_byte_encoder.cpp b/src/xbit_byte_encoder.cpp new file mode 100644 index 00000000..c4f869c5 --- /dev/null +++ b/src/xbit_byte_encoder.cpp @@ -0,0 +1,8 @@ +#include +#include + +#include "config.hpp" + +#include "xbit_byte_encoder.hpp" + + From f2ac64f28145242aa75eed2780e77f39c6a88338 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Fri, 18 Feb 2022 12:51:22 +0100 Subject: [PATCH 12/65] removal of compiler warnings --- include/fastafs.hpp | 2 +- include/fivebit_fivebytes.hpp | 4 ++-- include/fourbit_byte.hpp | 4 ++-- include/twobit_byte.hpp | 4 ++-- src/fastafs.cpp | 14 +++++--------- src/fivebit_fivebytes.cpp | 4 ++-- src/fourbit_byte.cpp | 2 +- src/twobit_byte.cpp | 4 ++-- 8 files changed, 17 insertions(+), 21 deletions(-) diff --git a/include/fastafs.hpp b/include/fastafs.hpp index b423a2cd..91d89e9b 100644 --- a/include/fastafs.hpp +++ b/include/fastafs.hpp @@ -78,7 +78,7 @@ class fastafs_seq uint32_t fasta_filesize(uint32_t padding); void view_fasta(ffs2f_init_seq*, chunked_reader &fh); - size_t view_sequence_region_size(ffs2f_init_seq*, sequence_region*, std::ifstream *); + size_t view_sequence_region_size(sequence_region*); uint32_t view_sequence_region(ffs2f_init_seq*, sequence_region*, char *, size_t, off_t, chunked_reader &); uint32_t view_fasta_chunk(ffs2f_init_seq*, char *, size_t, off_t, chunked_reader &); template uint32_t view_fasta_chunk_generalized(ffs2f_init_seq*, char *, size_t, off_t, chunked_reader &); diff --git a/include/fivebit_fivebytes.hpp b/include/fivebit_fivebytes.hpp index 7240f6a8..d2bc46e0 100644 --- a/include/fivebit_fivebytes.hpp +++ b/include/fivebit_fivebytes.hpp @@ -39,8 +39,8 @@ class fivebit_fivebytes static unsigned char iterator_to_offset(unsigned int); static unsigned char decompressed_to_compressed_bytes(unsigned char); // when only 5/8 bytes are filled, only 4/5 bytes need to be written - static const off_t nucleotides_to_compressed_fileoffset(size_t); // file offset waarna gelezen kan worden - static const off_t nucleotides_to_compressed_offset(size_t);// aantal bytes nodig om zoveel data weg te schrijven + static off_t nucleotides_to_compressed_fileoffset(size_t); // file offset waarna gelezen kan worden + static off_t nucleotides_to_compressed_offset(size_t);// aantal bytes nodig om zoveel data weg te schrijven void next(chunked_reader &); // update the compressed data and set buffer to decompressed data diff --git a/include/fourbit_byte.hpp b/include/fourbit_byte.hpp index 86bb34e1..1ec17540 100644 --- a/include/fourbit_byte.hpp +++ b/include/fourbit_byte.hpp @@ -30,8 +30,8 @@ class fourbit_byte : xbit_byte_encoder static unsigned char iterator_to_offset(unsigned int); - static const off_t nucleotides_to_compressed_fileoffset(size_t); // file offset waarna gelezen kan worden - static const off_t nucleotides_to_compressed_offset(size_t);// aantal bytes nodig om zoveel data weg te schrijven + static off_t nucleotides_to_compressed_fileoffset(size_t); // file offset waarna gelezen kan worden + //static off_t nucleotides_to_compressed_offset(size_t);// aantal bytes nodig om zoveel data weg te schrijven //@todo chunked reader should be in a function above this. //next(char *) should be implemented with decompressed content only diff --git a/include/twobit_byte.hpp b/include/twobit_byte.hpp index ab424041..9da7ad71 100644 --- a/include/twobit_byte.hpp +++ b/include/twobit_byte.hpp @@ -33,8 +33,8 @@ class twobit_byte : xbit_byte_encoder static unsigned char iterator_to_offset(unsigned int); - static const off_t nucleotides_to_compressed_fileoffset(size_t); // file offset waarna gelezen kan worden - static const off_t nucleotides_to_compressed_offset(size_t);// aantal bytes nodig om zoveel data weg te schrijven + static off_t nucleotides_to_compressed_fileoffset(size_t); // file offset waarna gelezen kan worden + static off_t nucleotides_to_compressed_offset(size_t);// aantal bytes nodig om zoveel data weg te schrijven void next(chunked_reader &); // update the compressed data }; diff --git a/src/fastafs.cpp b/src/fastafs.cpp index e314a2d1..3248b88e 100644 --- a/src/fastafs.cpp +++ b/src/fastafs.cpp @@ -226,7 +226,7 @@ template inline uint32_t fastafs_seq::view_fasta_chunk_generalized( pos += (uint32_t) copied; } - const uint32_t offset_from_sequence_line = pos - pos_limit; + const uint32_t offset_from_sequence_line = (uint32_t) (pos - pos_limit); size_t n_block = cache->n_starts.size(); size_t m_block = cache->m_starts.size(); uint32_t newlines_passed = offset_from_sequence_line / (cache->padding + 1);// number of newlines passed (within the sequence part) @@ -348,17 +348,12 @@ template inline uint32_t fastafs_seq::view_fasta_chunk_generalized( -size_t fastafs_seq::view_sequence_region_size(ffs2f_init_seq* cache, sequence_region* sr, std::ifstream *fh) +size_t fastafs_seq::view_sequence_region_size(sequence_region* sr) { #if DEBUG - if(cache == nullptr) { - throw std::invalid_argument("fastafs_seq::view_sequence_region - error 01\n"); - } - if(sr == nullptr) { throw std::invalid_argument("fastafs_seq::view_sequence_region - error 02\n"); } - #endif @@ -707,7 +702,8 @@ void fastafs::load(std::string afilename) // name size_t namesize = (unsigned char) memblock[0]; // cast to something that is large enough (> 128) - char name[namesize + 1]; + //char name[namesize + 1]; + char *name = new char[namesize + 1]; fh_in.read(name, namesize); name[(unsigned char) memblock[0]] = '\0'; s->name = std::string(name); @@ -851,7 +847,7 @@ size_t fastafs::view_sequence_region_size(ffs2f_init* cache, const char *seq_reg // 02 : check if 'chr' is equals this->data[i].name for(size_t i = 0; i < this->data.size(); i++) { if(sr.get_seq_name().compare(this->data[i]->name) == 0) { - return this->data[i]->view_sequence_region_size(cache->sequences[i], &sr, &file); + return this->data[i]->view_sequence_region_size(&sr); } } } diff --git a/src/fivebit_fivebytes.cpp b/src/fivebit_fivebytes.cpp index 98f945c0..85e1de11 100644 --- a/src/fivebit_fivebytes.cpp +++ b/src/fivebit_fivebytes.cpp @@ -242,7 +242,7 @@ unsigned char fivebit_fivebytes::decompressed_to_compressed_bytes(unsigned char * >Seq * [ABCDEFGH][ABCDEFGH][ACCCAAC] has offset of 2? * */ -const off_t fivebit_fivebytes::nucleotides_to_compressed_fileoffset(size_t n_amino_acids) +off_t fivebit_fivebytes::nucleotides_to_compressed_fileoffset(size_t n_amino_acids) { off_t out = n_amino_acids / (off_t) fivebit_fivebytes::nucleotides_per_chunk; @@ -263,7 +263,7 @@ const off_t fivebit_fivebytes::nucleotides_to_compressed_fileoffset(size_t n_ami * [ABCDEFGH][ABCDEFGH][A] has offset of 11? * */ -const off_t fivebit_fivebytes::nucleotides_to_compressed_offset(size_t n_amino_acids) +off_t fivebit_fivebytes::nucleotides_to_compressed_offset(size_t n_amino_acids) { return fivebit_fivebytes::nucleotides_to_compressed_fileoffset(n_amino_acids) + fivebit_fivebytes::decompressed_to_compressed_bytes(n_amino_acids % fivebit_fivebytes::nucleotides_per_chunk); diff --git a/src/fourbit_byte.cpp b/src/fourbit_byte.cpp index 1f0a3f49..38d95c8a 100644 --- a/src/fourbit_byte.cpp +++ b/src/fourbit_byte.cpp @@ -263,7 +263,7 @@ char *fourbit_byte::get() * dit is naar beneden afgerond zodat de file pointer ervoor start * * */ -const off_t fourbit_byte::nucleotides_to_compressed_fileoffset(size_t n_nucleotides) +off_t fourbit_byte::nucleotides_to_compressed_fileoffset(size_t n_nucleotides) { return (off_t) n_nucleotides / fourbit_byte::nucleotides_per_byte; } diff --git a/src/twobit_byte.cpp b/src/twobit_byte.cpp index 7d4bc83c..0b9f66ca 100644 --- a/src/twobit_byte.cpp +++ b/src/twobit_byte.cpp @@ -145,12 +145,12 @@ char *twobit_byte::get() * >Seq * [ACTG][ACTG][AC] has offset of 2 (or 3)? * */ -const off_t twobit_byte::nucleotides_to_compressed_fileoffset(size_t n_nucleotides) +off_t twobit_byte::nucleotides_to_compressed_fileoffset(size_t n_nucleotides) { return (off_t) n_nucleotides / twobit_byte::nucleotides_per_byte; } -const off_t twobit_byte::nucleotides_to_compressed_offset(size_t n_nucleotides) +off_t twobit_byte::nucleotides_to_compressed_offset(size_t n_nucleotides) { return twobit_byte::nucleotides_to_compressed_fileoffset(n_nucleotides + twobit_byte::nucleotides_per_byte - 1); } From 6460d02400572614413cb2a0b508e9a18d9fe604 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Fri, 18 Feb 2022 12:56:55 +0100 Subject: [PATCH 13/65] remove another compiler warning --- test/fivebit_fivebytes/test_fivebit_fivebytes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/fivebit_fivebytes/test_fivebit_fivebytes.cpp b/test/fivebit_fivebytes/test_fivebit_fivebytes.cpp index 0d3f73ce..6a9605d4 100644 --- a/test/fivebit_fivebytes/test_fivebit_fivebytes.cpp +++ b/test/fivebit_fivebytes/test_fivebit_fivebytes.cpp @@ -141,7 +141,7 @@ BOOST_AUTO_TEST_CASE(test_fivebit_fivebytes__dict_conversions) for(size_t i = 0; i < dict.size(); i++) { // set and compress amino acid string for(size_t j = 0 ; j < 8; j ++) { - f.set((unsigned char) j, hash[ dict[i][j] ]) ; + f.set((unsigned char) j, (unsigned char) hash[ (unsigned char) dict[i][j] ]) ; } // decompress From 49133db57e34a7d41ad78814ba8fd71ee1fc34ca Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Fri, 18 Feb 2022 13:33:19 +0100 Subject: [PATCH 14/65] removal of unused arguments and better OOP structure --- include/fastafs.hpp | 2 +- include/fourbit_byte.hpp | 5 ++-- include/twobit_byte.hpp | 5 ++-- include/xbit_byte_encoder.hpp | 4 ++- src/fastafs.cpp | 16 +--------- src/fourbit_byte.cpp | 8 +---- src/fuse.cpp | 2 +- src/twobit_byte.cpp | 7 +---- src/ucsc2bit.cpp | 3 +- src/xbit_byte_encoder.cpp | 6 ++++ test/fastafs/test_fastafs.cpp | 56 +++++++++++++++++------------------ 11 files changed, 48 insertions(+), 66 deletions(-) diff --git a/include/fastafs.hpp b/include/fastafs.hpp index 91d89e9b..92b00b51 100644 --- a/include/fastafs.hpp +++ b/include/fastafs.hpp @@ -126,7 +126,7 @@ class fastafs void load(std::string); void view_fasta(ffs2f_init*); - size_t view_sequence_region_size(ffs2f_init*, const char *); // read stuff like "chr1:123-456" into the buffer + size_t view_sequence_region_size(const char *); // read stuff like "chr1:123-456" into the buffer uint32_t view_sequence_region(ffs2f_init*, const char *, char*, size_t, off_t); // read stuff like "chr1:123-456" into the buffer uint32_t view_fasta_chunk(ffs2f_init*, char*, size_t, off_t, chunked_reader &); uint32_t view_fasta_chunk(ffs2f_init*, char*, size_t, off_t); diff --git a/include/fourbit_byte.hpp b/include/fourbit_byte.hpp index 1ec17540..97ca88c7 100644 --- a/include/fourbit_byte.hpp +++ b/include/fourbit_byte.hpp @@ -9,7 +9,7 @@ #include "xbit_byte_encoder.hpp" -class fourbit_byte : xbit_byte_encoder +class fourbit_byte : public xbit_byte_encoder { public: static const char xbit_byte_encoder::n_fill_unmasked = '-'; @@ -22,7 +22,6 @@ class fourbit_byte : xbit_byte_encoder static const char nucleotides_per_byte = 8 / fourbit_byte::bits_per_nucleotide ; // this is about compressed data static const char nucleotides_per_chunk = 8 / fourbit_byte::bits_per_nucleotide ; // this is about decompressed chunks - unsigned char data; void set(unsigned char, unsigned char); void set(char*);// string met 4 bytes set char *get(void); @@ -35,7 +34,7 @@ class fourbit_byte : xbit_byte_encoder //@todo chunked reader should be in a function above this. //next(char *) should be implemented with decompressed content only - void next(chunked_reader &); // update the compressed data and set buffer to decompressed data + //void next(chunked_reader &); // update the compressed data and set buffer to decompressed data }; #endif diff --git a/include/twobit_byte.hpp b/include/twobit_byte.hpp index 9da7ad71..6e37a73d 100644 --- a/include/twobit_byte.hpp +++ b/include/twobit_byte.hpp @@ -9,7 +9,7 @@ #include "xbit_byte_encoder.hpp" -class twobit_byte : xbit_byte_encoder +class twobit_byte : public xbit_byte_encoder { private: // things only needed by the compression [encoding, not decoding] @@ -25,7 +25,6 @@ class twobit_byte : xbit_byte_encoder static const char nucleotides_per_byte = 8 / twobit_byte::bits_per_nucleotide ; // this is about compressed data static const char nucleotides_per_chunk = 8 / twobit_byte::bits_per_nucleotide ; // this is about decompressed chunks - unsigned char data; // go private void set(unsigned char, unsigned char); void set(char*);// string met 4 bytes set char *get(void); @@ -36,7 +35,7 @@ class twobit_byte : xbit_byte_encoder static off_t nucleotides_to_compressed_fileoffset(size_t); // file offset waarna gelezen kan worden static off_t nucleotides_to_compressed_offset(size_t);// aantal bytes nodig om zoveel data weg te schrijven - void next(chunked_reader &); // update the compressed data + //void next(chunked_reader &); // update the compressed data }; diff --git a/include/xbit_byte_encoder.hpp b/include/xbit_byte_encoder.hpp index 6599a328..9ab30c65 100644 --- a/include/xbit_byte_encoder.hpp +++ b/include/xbit_byte_encoder.hpp @@ -20,6 +20,8 @@ class xbit_byte_encoder static const unsigned char bits_per_nucleotide; + unsigned char data; // go private + xbit_byte_encoder() {}; /* @@ -36,9 +38,9 @@ class xbit_byte_encoder static const off_t nucleotides_to_compressed_fileoffset(size_t); // file offset waarna gelezen kan worden static const off_t nucleotides_to_compressed_offset(size_t);// aantal bytes nodig om zoveel data weg te schrijven + */ void next(chunked_reader &); // update the compressed data - */ }; diff --git a/src/fastafs.cpp b/src/fastafs.cpp index 3248b88e..7d5fdd0d 100644 --- a/src/fastafs.cpp +++ b/src/fastafs.cpp @@ -823,22 +823,8 @@ ffs2f_init* fastafs::init_ffs2f(uint32_t padding, bool allow_masking) // estimates the whole file size of a file such as "/seq/chr1:56-" -size_t fastafs::view_sequence_region_size(ffs2f_init* cache, const char *seq_region_arg) +size_t fastafs::view_sequence_region_size(const char *seq_region_arg) { -#if DEBUG - if(cache == nullptr) { - throw std::invalid_argument("fastafs::view_sequence_region - error 01\n"); - } - - if(cache->padding_arg != 0) { - throw std::invalid_argument("fastafs::view_sequence_region - error 02\n"); - } - - if(cache->sequences.size() == 0) { - throw std::invalid_argument("fastafs::view_sequence_region - error 03\n"); - } -#endif - std::ifstream file(this->filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate); if(file.is_open()) { // parse "chr..:..-.." string diff --git a/src/fourbit_byte.cpp b/src/fourbit_byte.cpp index 38d95c8a..1cce4422 100644 --- a/src/fourbit_byte.cpp +++ b/src/fourbit_byte.cpp @@ -134,7 +134,7 @@ void fourbit_byte::set(unsigned char bit_offset, unsigned char nucleotide) break; #endif //DEBUG } -}; +} // input char "AACCCTTGG" @@ -271,9 +271,3 @@ off_t fourbit_byte::nucleotides_to_compressed_fileoffset(size_t n_nucleotides) -void fourbit_byte::next(chunked_reader &r) -{ - this->data = r.read(); -} - - diff --git a/src/fuse.cpp b/src/fuse.cpp index 8d59e8ee..a9dd99e9 100644 --- a/src/fuse.cpp +++ b/src/fuse.cpp @@ -116,7 +116,7 @@ static int do_getattr(const char *path, struct stat *st) st->st_nlink = 1; //@todo this needs to be defined with some api stuff:!! - st->st_size = (signed int) ffi->f->view_sequence_region_size(ffi->cache_p0, (strchr(path, '/') + 5)); + st->st_size = (signed int) ffi->f->view_sequence_region_size( (strchr(path, '/') + 5)); } else { st->st_mode = S_IFREG | 0444; st->st_nlink = 1; diff --git a/src/twobit_byte.cpp b/src/twobit_byte.cpp index 0b9f66ca..c552803d 100644 --- a/src/twobit_byte.cpp +++ b/src/twobit_byte.cpp @@ -66,7 +66,7 @@ void twobit_byte::set(unsigned char bit_offset, unsigned char nucleotide) break; #endif //DEBUG } -}; +} // input char "AACCCTTGG" @@ -158,10 +158,5 @@ off_t twobit_byte::nucleotides_to_compressed_offset(size_t n_nucleotides) -// needs to be separate function because not encodings read byte-per-byte -void twobit_byte::next(chunked_reader &r) -{ - this->data = r.read(); -} diff --git a/src/ucsc2bit.cpp b/src/ucsc2bit.cpp index 922c961e..0c298c92 100644 --- a/src/ucsc2bit.cpp +++ b/src/ucsc2bit.cpp @@ -299,7 +299,8 @@ void ucsc2bit::load(std::string afilename) } // name - char name[memblock[0] + 1]; + //char name[memblock[0] + 1]; + char *name = new char[memblock[0] + 1]; if(!file.read(name, memblock[0])) { delete[] memblock; throw std::invalid_argument("Corrupt, unreadable or truncated file (early EOF): " + filename); diff --git a/src/xbit_byte_encoder.cpp b/src/xbit_byte_encoder.cpp index c4f869c5..90e9955d 100644 --- a/src/xbit_byte_encoder.cpp +++ b/src/xbit_byte_encoder.cpp @@ -6,3 +6,9 @@ #include "xbit_byte_encoder.hpp" + +void xbit_byte_encoder::next(chunked_reader &r) +{ + this->data = r.read(); +} + diff --git a/test/fastafs/test_fastafs.cpp b/test/fastafs/test_fastafs.cpp index 664a8916..3dc187d2 100644 --- a/test/fastafs/test_fastafs.cpp +++ b/test/fastafs/test_fastafs.cpp @@ -391,7 +391,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -408,7 +408,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -427,7 +427,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -446,7 +446,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -465,7 +465,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 0); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -486,7 +486,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -506,7 +506,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -525,7 +525,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -545,7 +545,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -564,7 +564,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -583,7 +583,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -602,7 +602,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -622,7 +622,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); @@ -643,7 +643,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 1); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -663,7 +663,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 2); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -684,7 +684,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 2); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -705,7 +705,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 0); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -725,7 +725,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 8); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -753,7 +753,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 4); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -777,7 +777,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 2); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); @@ -801,7 +801,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 2); @@ -825,7 +825,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 15); @@ -836,7 +836,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) BOOST_CHECK_EQUAL(buffer[2], 'T'); BOOST_CHECK_EQUAL(buffer[3], 'G'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 15); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, 4, 4); // small buffer size @@ -846,7 +846,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) BOOST_CHECK_EQUAL(buffer[2], 'T'); BOOST_CHECK_EQUAL(buffer[3], 'G'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 15); @@ -857,7 +857,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) BOOST_CHECK_EQUAL(buffer[2], 'a'); BOOST_CHECK_EQUAL(buffer[3], 'a'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 15); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, 4, 12); // small buffer size @@ -866,7 +866,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) BOOST_CHECK_EQUAL(buffer[1], 'c'); BOOST_CHECK_EQUAL(buffer[2], 'c'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 15); delete[] buffer; @@ -883,7 +883,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) buffer = new char[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 4); @@ -907,7 +907,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs__sequence_virtualization) flush_buffer(buffer, READ_BUFFER_SIZE, '\0'); - written = fs.view_sequence_region_size(cache_p0, (strchr(arg, '/') + 5)); + written = fs.view_sequence_region_size((strchr(arg, '/') + 5)); BOOST_CHECK_EQUAL(written, 0); written = fs.view_sequence_region(cache_p0, (strchr(arg, '/') + 5), buffer, READ_BUFFER_SIZE, 0); // small buffer size From de9723d5c6b22da842a5199c05531a023471189a Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Fri, 18 Feb 2022 13:41:42 +0100 Subject: [PATCH 15/65] typo --- include/fourbit_byte.hpp | 2 +- src/fourbit_byte.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/fourbit_byte.hpp b/include/fourbit_byte.hpp index 97ca88c7..6cbb6bac 100644 --- a/include/fourbit_byte.hpp +++ b/include/fourbit_byte.hpp @@ -16,7 +16,7 @@ class fourbit_byte : public xbit_byte_encoder static const char xbit_byte_encoder::n_fill_masked = '-'; static const unsigned char xbit_byte_encoder::bits_per_nucleotide = 4; - static const char fourbit_alhpabet[17]; + //static const char fourbit_alphabet[17]; static char encode_hash[256][3]; static const char nucleotides_per_byte = 8 / fourbit_byte::bits_per_nucleotide ; // this is about compressed data diff --git a/src/fourbit_byte.cpp b/src/fourbit_byte.cpp index 1cce4422..05568e9a 100644 --- a/src/fourbit_byte.cpp +++ b/src/fourbit_byte.cpp @@ -20,7 +20,7 @@ binary: IUPEC 11111111 NN */ -const char fourbit_byte::fourbit_alhpabet[17] = "ACGTURYKMSWBDHVN"; +//const char fourbit_byte::fourbit_alphabet[17] = "ACGTURYKMSWBDHVN"; char fourbit_byte::encode_hash[256][3] = {"AA", "AC", "AG", "AT", "AU", "AR", "AY", "AK", "AM", "AS", "AW", "AB", "AD", "AH", "AV", "AN", "CA", "CC", "CG", "CT", "CU", "CR", "CY", "CK", "CM", "CS", "CW", "CB", "CD", "CH", "CV", "CN", "GA", "GC", "GG", "GT", "GU", "GR", "GY", "GK", "GM", "GS", "GW", "GB", "GD", "GH", "GV", "GN", "TA", "TC", "TG", "TT", "TU", "TR", "TY", "TK", "TM", "TS", "TW", "TB", "TD", "TH", "TV", "TN", "UA", "UC", "UG", "UT", "UU", "UR", "UY", "UK", "UM", "US", "UW", "UB", "UD", "UH", "UV", "UN", "RA", "RC", "RG", "RT", "RU", "RR", "RY", "RK", "RM", "RS", "RW", "RB", "RD", "RH", "RV", "RN", "YA", "YC", "YG", "YT", "YU", "YR", "YY", "YK", "YM", "YS", "YW", "YB", "YD", "YH", "YV", "YN", "KA", "KC", "KG", "KT", "KU", "KR", "KY", "KK", "KM", "KS", "KW", "KB", "KD", "KH", "KV", "KN", "MA", "MC", "MG", "MT", "MU", "MR", "MY", "MK", "MM", "MS", "MW", "MB", "MD", "MH", "MV", "MN", "SA", "SC", "SG", "ST", "SU", "SR", "SY", "SK", "SM", "SS", "SW", "SB", "SD", "SH", "SV", "SN", "WA", "WC", "WG", "WT", "WU", "WR", "WY", "WK", "WM", "WS", "WW", "WB", "WD", "WH", "WV", "WN", "BA", "BC", "BG", "BT", "BU", "BR", "BY", "BK", "BM", "BS", "BW", "BB", "BD", "BH", "BV", "BN", "DA", "DC", "DG", "DT", "DU", "DR", "DY", "DK", "DM", "DS", "DW", "DB", "DD", "DH", "DV", "DN", "HA", "HC", "HG", "HT", "HU", "HR", "HY", "HK", "HM", "HS", "HW", "HB", "HD", "HH", "HV", "HN", "VA", "VC", "VG", "VT", "VU", "VR", "VY", "VK", "VM", "VS", "VW", "VB", "VD", "VH", "VV", "VN", "NA", "NC", "NG", "NT", "NU", "NR", "NY", "NK", "NM", "NS", "NW", "NB", "ND", "NH", "NV", "NN"}; From 8026c71ad47d154aa77a7131df6f2a15e8d8e601 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sat, 19 Feb 2022 10:52:56 +0100 Subject: [PATCH 16/65] minor OOP improvements --- include/chunked_reader.hpp | 30 ++++++++++++++++++------------ src/chunked_reader.cpp | 16 ++++++++++++---- src/fastafs.cpp | 2 +- 3 files changed, 31 insertions(+), 17 deletions(-) diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index 7079dc78..14ec64da 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -30,6 +30,21 @@ enum compression_type : signed char { class chunked_reader { +private: + std::ifstream *fh_flat; + ZSTD_seekable_decompress_init_data* fh_zstd; + + std::string filename; // try doing this with inode + + compression_type filetype; + + char buffer[READ_BUFFER_SIZE + 1]; + size_t buffer_i; + size_t buffer_n; + + off_t file_i; + + public: chunked_reader(char *); // filename chunked_reader(const char *); // filename @@ -37,23 +52,14 @@ class chunked_reader void init(); // generic tasks needed for init - std::string filename; // try doing this with inode - - std::ifstream *fh_flat; void update_flat_buffer(); - - ZSTD_seekable_decompress_init_data* fh_zstd; void update_zstd_buffer(); - compression_type filetype; - char buffer[READ_BUFFER_SIZE + 1]; - size_t buffer_i; - size_t buffer_n; - - off_t file_i; + void find_filetype(); - void set_filetype(); + void set_filetype(compression_type); + compression_type get_filetype() { return this->filetype ; }; size_t read(char *, size_t);// @deprecate size_t read(unsigned char *, size_t); diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index a93d83ac..519929a5 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -43,7 +43,7 @@ chunked_reader::~chunked_reader() void chunked_reader::init() { - this->set_filetype(); + this->find_filetype(); switch(this->filetype) { @@ -72,15 +72,23 @@ void chunked_reader::init() } } -void chunked_reader::set_filetype() +void chunked_reader::find_filetype() { if(is_zstd_file((const char*) this->filename.c_str())) { - this->filetype = zstd; + this->set_filetype(zstd); } else { - this->filetype = uncompressed; + this->set_filetype(uncompressed); } } +void chunked_reader::set_filetype(compression_type arg_filetype) +{ + this->filetype = arg_filetype; +} + + +void set_filtetype(compression_type &filetype_arg); + size_t chunked_reader::read(char *arg_buffer, size_t buffer_size) { diff --git a/src/fastafs.cpp b/src/fastafs.cpp index 7d5fdd0d..debb89ee 100644 --- a/src/fastafs.cpp +++ b/src/fastafs.cpp @@ -644,7 +644,7 @@ void fastafs::load(std::string afilename) chunked_reader fh_in = chunked_reader(afilename.c_str()); { - this->filetype = fh_in.filetype; + this->filetype = fh_in.get_filetype(); memblock = new char [20 + 1]; //sha1 is 20b // if a user can't compile this line, please replace it with C's From c8bddebda54b89dc79bffc740631afde81a83e30 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sat, 19 Feb 2022 11:06:35 +0100 Subject: [PATCH 17/65] comment --- include/chunked_reader.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index 14ec64da..20acabf7 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -26,6 +26,9 @@ enum compression_type : signed char { }; +//@todo implement w/ state design pattern +//url: https://refactoring.guru/design-patterns/state + class chunked_reader From 9ea1306a487d84c39ab9109f151131f842d9c423 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sun, 20 Feb 2022 12:20:25 +0100 Subject: [PATCH 18/65] partial save commit --- include/chunked_reader.hpp | 44 ++++++++++++++++++++- src/chunked_reader.cpp | 32 +++++++++++++++ test/chunked_reader/test_chunked_reader.cpp | 41 ++++++++++++++++++- 3 files changed, 113 insertions(+), 4 deletions(-) diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index 20acabf7..2dbc9a56 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -19,7 +19,7 @@ #include "zstd_seekable_utils.hpp" -enum compression_type : signed char { +enum compression_type : signed char { // dit is State undefined = -1, uncompressed = 0, zstd = 1 @@ -31,7 +31,7 @@ enum compression_type : signed char { -class chunked_reader +class chunked_reader // dit is Context { private: std::ifstream *fh_flat; @@ -75,5 +75,45 @@ class chunked_reader +class State +{ +public: + public: + virtual ~State() {}; + void set_context();// + + virtual void update_buffer() = 0; + +}; // comrpession type + +class ContextUncompressed : public State +{ +private: + uint i; // implementation specific integer +public: + void update_buffer() override; +}; + +class ContextZstdSeekable : public State +{ +public: + void update_buffer() override; +}; + +class Context // master chunked_reader +{ +protected: + State *state_; + +public: + void TransitionTo(State *state); + Context : state_(nullptr) (const char * arg_filename); + + static State* find_state(const char *arg_filename); +}; + + + + #endif diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 519929a5..c32c877a 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -252,3 +252,35 @@ size_t chunked_reader::tell() return this->file_i - this->buffer_n + this->buffer_i; } + + + + + + + + +Context::Context(const char * arg_filename) +{ + printf("Constructor alive\n"); + + State *state = Context::find_state(arg_filename); +} + +State * Context::find_state(const char * arg_filename) +{ + if(is_zstd_file(arg_filename)) { + return new ContextZstdSeekable; + } else { + return new ContextUncompressed; + } +} + + + +void ContextUncompressed::update_buffer() { + printf("hello Uncompr\n"); +} +void ContextZstdSeekable::update_buffer() { + printf("hello ZstdSeek\n"); +} diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index 6a0a7e97..b37cffb2 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -30,7 +30,7 @@ BOOST_AUTO_TEST_SUITE(Testing) -BOOST_AUTO_TEST_CASE(test_chunked_reading_small_file) +BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) { std::string test_name = "test"; std::string fasta_file = "test/data/" + test_name + ".fa"; @@ -140,7 +140,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reading_small_file) } -BOOST_AUTO_TEST_CASE(test_chunked_reading_large_file) +BOOST_AUTO_TEST_CASE(test_chunked_reader__large_file) { // this file needs two buffers as its size is 1593 @@ -301,6 +301,43 @@ BOOST_AUTO_TEST_CASE(test_chunked_reading_large_file) } +BOOST_AUTO_TEST_CASE(test_chunked_reader__new_style) +{ + // this file needs two buffers as its size is 1593 + + std::string test_name = "test_007"; + std::string fasta_file = "test/data/" + test_name + ".fa"; + std::string fastafs_file = "tmp/" + test_name + ".fastafs"; + std::string fastafs_file_zstd = "tmp/" + test_name + ".fastafs.zst"; + + fasta_to_fastafs(fasta_file, fastafs_file, false); + ZSTD_seekable_compressFile_orDie((const char*) fastafs_file.c_str(), + (const char*) fastafs_file_zstd.c_str(), + (int) ZSTD_COMPRESSION_QUALIITY, + (unsigned) ZSTD_SEEKABLE_FRAME_SIZE); + + + char buffer[READ_BUFFER_SIZE + 1]; + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + std::string std_buffer; + size_t written; + + std::string reference1 = "\x0f\x0a\x46\x53\x00\x00\x00\x00\x80\x00\x00\x00\x06\x20\x00\x00\x17\xd7\xf4\xbd\xdd\x5d\x39\xcc\xce\x7e\xe8\x6e\x9d\x92\x70\x2d\x96\x68\x9f\xba\x83\xe1\x99\x2d\x9f\xe4\xed\x65\x3f\x09\x88\x5d\x28\x5c\xc0\x99\x36\x80\x87\xdc\x02\xc0\xe5\x5a\xef\xae\x56\x95\x59\x91\xb6\xde\x35\xf4\x1c\x60\x1e\x30\xd1\x77\x1c\x70\x2d\xda\xed\xc5\xfc\x58\x8a\x28\x94\x2b\x4f\x96\x97\x18\xa0\x65\x22\x48\xa6\x06\x1b\x65\x7f\xf4\x82\x8f\xe3\x05\xde\x00\x70\xb7\xb5\xa4\x1e\xc3\x43\xe9\x49\x92\x8b\x47\xa6\xdd\x97\xd4\x93\x4d\xb4\xd0\x76\xc7\x4d\xeb\x71\x48\x77\x43\x91\xcd\xe5\x8f\x8d\xa2\xcb\x28\x53\xcf\x82\xa4\xd5\x85\x78\xae\x37\xd9\x19\x13\x54\x52\x0c\x7d\xcb\x2a\xfd\x1b\x38\x66\xaa\xd3\x23\xe6\xf7\x20\xd5\x0a\xf1\x4b\x59\xe6\x0b\xbe\x42\xa9\x5e\x7d\xce\xec\x73\xd9\x8b\xc6\x4b\x35\xe4\x69\xbc\x10\x35\x8a\x0e\x09\x2b\xf1\x9f\x38\x15\x57\x21\x08\xe1\xa6\x6e\xf1\x8c\x52\x08\x1b\x85\x50\xe0\x1e\x01\x35\x3a\x0a\x72\x1b\xb3\xda\xfd\x78\x36\x10\xb7\x1a\x2e\x93\xd4\x63\xab\x0b\x98\xfb\x4b\x97\x47\x7f\x61\x0f\x36\x7f\xfe\x02\x36\x2e\x30\xa5\xdb\x8f\xde\xd0\xc0\xc6\x9c\x3a\x7b\x71\x24\x1e\xc3\x04\xac\x31\x7a\xf5\xf3\x33\x26\x99\xa6\x4f\x43\x6c\x46\x5c\x4d\xf5\xb8\x43\x1d\xd7\x73\x3e\xe9\xb6\x3b\xdf\xff\xf6\xf3\x2a\x34\x3f\x39\x60\x4b\xed\xde\xf4\x2f\x5d\xe7\xab\xfe\xa1\x4d\x11\x9c\xcc\x41\xf8\x3c\xdd\x18\xea\xea\x45\x3e\xa5\x0b\xb5\x7b\x38\x5e\x26\x72\xdd\x24\x51\x48\xcf\x79\xa7\xd9\x06\x2e\xe8\xfb\x5d\x3d\x4a\x81\x0d\x15\x48\xd0\x84\x15\x0f\x15\x5b\xc3\x9d\x48\xc4\x9e\x2f\x45\xd4\x1c\x24\xc4\x90\x60\xe6\xa1\x19\x6f\x2d\x3a\xf9\x52\x0d\x06\x93\x21\xb4\xc2\x43\xd7\xce\x5b\xaa\x42\x20\x35\x6c\x45\xa2\xea\xd8\xe0\xc7\x90\xe3\x4a\x3e\xb1\x65\xaf\x5c\xe3\x23\x58\x65\x88\x92\x0b\x98\xc4\x3f\x7b\xb4\x42\x6c\x77\x7f\xf3\x51\x1a\x17\x89\x1f\x03\x66\x95\xbb\x83\x3f\xfb\xd1\x8c\x46\x40\x7f\xd1\xff\x7a\xbb\xb2\xcc\xaa\xc9\xfe\xbe\x7a\xff\x5b\xf7\x17\xe0\x4d\xca\x6b\xf2\xef\x0f\x0c\x48\x90\x5c\x6d\xa4\x53\xf4\xfb\xe3\xfe\x38\x61\xdb\x32\xe6\x6e\x35\x86\xad\xad\x33\x3b\x7f\x92\x7f\xf9\x4c\xbd\x92\xbe\x41\x4f\x23\x37\xa2\x6e\xd9\x7d\x82\x47\xa4\x8f\x77\x51\xdb\x2f\xd6\xda\xcb\x1d\x7b\x2b\xe3\x29\x6f\x03\xad\xce\x05\xa7\xab\x34\x52\xb8\x94\xd3\x08\x5b\x9f\x0d\xec\x27\x09\xce\xb5\x82\x89\x43\xe0\xc3\xc3\x7f\xad\xeb\x30\x0a\x5c\xa8\x88\xc8\x38\x02\x18\x4d\xda\x80\x02\xf5\xb0\x0b\xbf\x3b\xbc\x11\x6b\xe7\xfd\x4b\x4a\xe9\x48\x31\x9f\x3a\x83\x80\x7b\x21\x73\xf8\x99\x43\x1b\xd6\x1a\xb6\xce\xe4\xff\x0e\x58\x33\x86\xd0\x09\x70\x14\x63\xc6\x45\x8f\x2a\x5f\xc8\xb2\x82\xdc\x4f\x99\x81\xa8\x87\xe4\xbf\xc5\xfe\x35\x81\x73\x63\x21\xf1\x82\xdb\x73\xfe\xe2\x1b\x5f\xff\x07\x8b\xb4\xef\xb6\x6a\x92\x9c\xcf\x6d\x09\xb1\xc1\x78\xa4\x56\x37\xe4\x6a\xf9\x01\x1e\x8c\x51\x14\x10\x34\xbd\xb0\x4f\xc6\xcb\xd6\xf4\xee\xed\x7c\x23\xa2\x80\xde\x5d\x76\x9d\x09\xd8\x1d\x45\x21\xc1\xad\xe9\x74\xf2\x61\xd4\x0b\xc7\x0d\x6a\xab\x25\x7c\x19\xa3\xf0\x88\x87\x7b\xba\xf0\x37\x3f\x59\x8f\x7f\x8e\x25\xbb\x80\x70\xf2\xe3\xf5\x0a\xa5\xb5\x2c\x43\x6f\xf1\x7b\xd3\x48\x86\x9a\xa2\xb1\x42\x89\xf3\x00\x0e\x9d\x99\xca\x5e\xb0\x2a\xf7\x46\xe6\xfb\xb9\x22\xc9\x14\xb9\x75\x95\x82\x87\x0d\x9a\x54\x80\xf6\xbc\x1f\xd9\xcb\x09\x0c\x4b\x5e\x38\xa1\x10\xaa\x32\xb1\xfa\xcc\xba\x37\x37\x01\x6d\x7f\xf1\x9d\x49\x35\x6a\x5b\xec\xec\xfb\x6a\x46\xca\x41\x03\x35\xfb\x56\xef\x5b\xe2\x44\xa0\x9e\xf8\x99\xde\x92\x17\x12\x98\x5e\x11\xe0\x73\x94\x23\xc9\x81\x61\xcc\x8a\xb4\x72\x5d\x6e\x1b\xfb\xa4\x3c\x79\x06\x12\xd3\x00\x47\xa7\x8e\x8c\x42\x9d\xa4\xfd\x34\xcd\xf0\x94\xdc\x3c\x84\xe3\xf7\xfc\x16\xd8\x0d\x4a\x9d\x05\xe1\xff\x1b\x47\xf1\xdc\xdf\xa4\x86\x09\xc1\xfe\xde\x45\xe4\x43\xfd\x0d\x05\xf4\x3f\xb5\x2e\xe7\x48\xde\xc8\x2b\x8a\x5f\xee\x28\x66\x09\xb4\x65\x12\x77\x23\x6a\xe2\x80\xa4\xc2\xa5\x1e\xbe\xd9\x8e\xae\x56\x4d\x56\xfe\xed\xe8\x0e\x39\xab\xba\x68\xfd\x39\x2c\x22\x30\x80\x31\xfe\x34\x46\x7d\xea\x3c\x8e\x5b\x87\xef\xac\x2d\xe3\x80\x19\x5a\xd8\xba\x63\xd5\xb4\x59\xc0\x38\xff\xc5\xd8\x00\x75\x8e\x31\x7c\x1f\x90\x98\xdc\x4a\x9c\x67\x84\x12\x87\xb2\x06\xcc\x5c\x41\xc4\xa2\x22\x88\x2d\xf5\x43\xdc\x5f\xe8\x71\xa0\x0f\xbd\xa8\x33\x6f\x83\xbf\xc0\x3a\xfd\xa7\xf9\x8a\x93\x12\x94\x0a\x9e\x39\x68\x60\xc2\xfe\x0a\x2c\x13\xb6\x25\x5a\x85\x62\x1c\x5b"s;// xxd -p + std::string reference2 = "\x44\x2c\x05\x5b\xe6\x92\x56\x6b\x2f\xf6\x4f\xfb\xdc\x46\x9c\xe2\xbd\xac\xc0\x0d\x53\x44\x4d\x29\xd3\xe3\x61\x06\x77\xfb\x0c\x1b\xfa\x05\x17\x3b\x32\xc8\x6c\xd3\x0e\xa8\x18\xde\x64\xfb\x8a\xb8\x84\xf6\x3f\x17\xc4\x1f\xea\x8c\xea\xd5\x42\xc1\xb3\xdb\x68\x90\x8a\x24\x2f\x0c\xc5\x9b\xb6\xd6\x16\x5d\x3d\x38\xf1\xf6\x80\xf2\x56\x47\xf3\x95\x64\x7e\x50\x14\x02\x73\xa9\x0a\x04\x01\xcc\xf3\x1b\x3c\x9a\xfd\x98\x86\xdf\x54\xe6\x36\x50\xe9\xc0\x46\xd7\xae\x54\xd1\xe4\xaf\x98\xc3\xa6\xee\x44\xce\x8c\x16\xdf\x33\x87\x0b\xca\x12\x91\xac\xa4\xbe\x4e\xdb\xb2\x32\x21\x21\x16\xdb\x0c\x5f\xe3\x33\xbd\xa9\x8a\x88\xed\x3e\x65\x46\x4d\x8b\x16\xf0\x73\xe7\x76\x3d\x42\xb5\xe1\xba\x14\xe8\xd9\x99\x4f\x67\xc2\x20\x0d\x41\x07\x27\x61\x3a\x28\x49\x6f\x73\xdb\x44\xdb\xe2\x5e\x54\x4e\x1c\xe0\xd4\x66\x1e\xfe\x0c\x96\x52\xb3\x79\x00\x9d\x87\xed\xee\xc6\x82\x5e\xdc\x8f\xcd\xc8\xaa\x1c\x44\x76\x22\x14\x99\xef\x56\x73\x0e\x93\x14\x77\xa3\xa4\x52\xa7\xad\x55\x6c\xe2\x1a\x6a\x57\xd1\xb8\x4a\x8f\x3a\xa9\xcf\xab\x20\x25\xc8\xa8\x13\x30\x3c\x78\xbd\x3e\x9d\x73\x8f\xd9\x10\x9c\x15\xa8\x8a\x58\x70\x34\x38\xbb\xff\x26\x6d\x42\xcd\x2f\x8f\x7c\x20\x39\xa5\x37\x70\xf1\x1f\x65\x8a\xc5\xa3\x4f\x02\x57\x35\x17\x1b\x91\xa2\xa6\xd4\x67\x1c\x54\xde\xb4\xaf\x53\x99\x92\x23\xc1\x3d\xcc\x62\x9c\x21\xd9\xb5\xde\x5f\xd6\x1e\xa5\x4a\x45\x7e\x10\x74\xc4\x9e\x7f\x3b\xdd\xf6\x6c\xb6\xf2\xc9\xb6\xbe\x01\x45\x2e\x4a\x3b\xaf\x41\x05\x91\x38\x68\x35\x36\x0e\x1a\xc7\xc9\x52\x6d\xc1\x9c\x9e\x50\x29\x7b\x3e\xe0\x39\x67\x32\xe8\xae\xaa\xac\x0c\xbb\x18\x4c\x11\x3b\x58\xc8\x80\x88\xf1\x6d\x7a\x3d\x36\xd0\x8e\xc1\xb1\xf8\xbb\xa9\xd1\xd6\x8f\x07\x6b\x12\x1a\x5b\xf1\xea\xed\x94\x1b\xe1\x1b\xe7\x0e\x75\x3d\x4e\xcf\x5b\x91\x2e\x78\x55\xd8\x8d\x1f\x1b\x09\x60\x38\xd2\xb8\xaa\x1f\xb5\x9d\x2c\xd4\x5c\x44\x78\x1f\x88\x4c\xaf\xa6\x2c\xeb\xca\x00\x51\xbe\xc9\x2e\x60\xaf\x0d\xb4\x02\xb3\x47\x0a\x3f\x4b\xbc\xc4\xa4\xff\xbb\xb3\x0e\x4f\xb3\xf0\x71\x3a\x84\x9a\x3d\x36\x33\x25\xeb\x2f\x76\x66\x5e\xc3\xd0\x66\xfc\xd4\x10\x3b\x78\x15\x61\x2d\xfc\xe6\x05\x7e\xda\x86\x43\x15\xb9\x78\xc2\x8b\x98\x42\x3e\x56\x42\x69\xba\xa2\xf3\x1e\xec\x00\x00\x00\x00\x21\x83\x67\xa8\x14\xed\xdc\x51\xeb\x96\x93\x98\x74\x4d\x13\x7c\x00\x00\x00\x00\x00\x00\x00\x01\x10\x00\x09\x6c\x65\x6e\x2d\x6c\x69\x6d\x69\x74\x00\x00\x00\x0e\x00\x98\x32\x91\x09"s;// xxd -p + std::string reference3 = "\x00\x00\x00\x00\x80\x00\x00\x00\x06\x20\x00\x00\x17\xd7\xf4\xbd\xdd\x5d\x39\xcc\xce\x7e\xe8\x6e\x9d\x92\x70\x2d\x96\x68\x9f\xba\x83\xe1\x99\x2d\x9f\xe4\xed\x65\x3f\x09\x88\x5d\x28\x5c\xc0\x99\x36\x80\x87\xdc\x02\xc0\xe5\x5a\xef\xae\x56\x95\x59\x91\xb6\xde\x35\xf4\x1c\x60\x1e\x30\xd1\x77\x1c\x70\x2d\xda\xed\xc5\xfc\x58\x8a\x28\x94\x2b\x4f\x96\x97\x18\xa0\x65\x22\x48\xa6\x06\x1b\x65\x7f\xf4\x82\x8f\xe3\x05\xde\x00\x70\xb7\xb5\xa4\x1e\xc3\x43\xe9\x49\x92\x8b\x47\xa6\xdd\x97\xd4\x93\x4d\xb4\xd0\x76\xc7\x4d\xeb\x71\x48\x77\x43\x91\xcd\xe5\x8f\x8d\xa2\xcb\x28\x53\xcf\x82\xa4\xd5\x85\x78\xae\x37\xd9\x19\x13\x54\x52\x0c\x7d\xcb\x2a\xfd\x1b\x38\x66\xaa\xd3\x23\xe6\xf7\x20\xd5\x0a\xf1\x4b\x59\xe6\x0b\xbe\x42\xa9\x5e\x7d\xce\xec\x73\xd9\x8b\xc6\x4b\x35\xe4\x69\xbc\x10\x35\x8a\x0e\x09\x2b\xf1\x9f\x38\x15\x57\x21\x08\xe1\xa6\x6e\xf1\x8c\x52\x08\x1b\x85\x50\xe0\x1e\x01\x35\x3a\x0a\x72\x1b\xb3\xda\xfd\x78\x36\x10\xb7\x1a\x2e\x93\xd4\x63\xab\x0b\x98\xfb\x4b\x97\x47\x7f\x61\x0f\x36\x7f\xfe\x02\x36\x2e\x30\xa5\xdb\x8f\xde\xd0\xc0\xc6\x9c\x3a\x7b\x71\x24\x1e\xc3\x04\xac\x31\x7a\xf5\xf3\x33\x26\x99\xa6\x4f\x43\x6c\x46\x5c\x4d\xf5\xb8\x43\x1d\xd7\x73\x3e\xe9\xb6\x3b\xdf\xff\xf6\xf3\x2a\x34\x3f\x39\x60\x4b\xed\xde\xf4\x2f\x5d\xe7\xab\xfe\xa1\x4d\x11\x9c\xcc\x41\xf8\x3c\xdd\x18\xea\xea\x45\x3e\xa5\x0b\xb5\x7b\x38\x5e\x26\x72\xdd\x24\x51\x48\xcf\x79\xa7\xd9\x06\x2e\xe8\xfb\x5d\x3d\x4a\x81\x0d\x15\x48\xd0\x84\x15\x0f\x15\x5b\xc3\x9d\x48\xc4\x9e\x2f\x45\xd4\x1c\x24\xc4\x90\x60\xe6\xa1\x19\x6f\x2d\x3a\xf9\x52\x0d\x06\x93\x21\xb4\xc2\x43\xd7\xce\x5b\xaa\x42\x20\x35\x6c\x45\xa2\xea\xd8\xe0\xc7\x90\xe3\x4a\x3e\xb1\x65\xaf\x5c\xe3\x23\x58\x65\x88\x92\x0b\x98\xc4\x3f\x7b\xb4\x42\x6c\x77\x7f\xf3\x51\x1a\x17\x89\x1f\x03\x66\x95\xbb\x83\x3f\xfb\xd1\x8c\x46\x40\x7f\xd1\xff\x7a\xbb\xb2\xcc\xaa\xc9\xfe\xbe\x7a\xff\x5b\xf7\x17\xe0\x4d\xca\x6b\xf2\xef\x0f\x0c\x48\x90\x5c\x6d\xa4\x53\xf4\xfb\xe3\xfe\x38\x61\xdb\x32\xe6\x6e\x35\x86\xad\xad\x33\x3b\x7f\x92\x7f\xf9\x4c\xbd\x92\xbe\x41\x4f\x23\x37\xa2\x6e\xd9\x7d\x82\x47\xa4\x8f\x77\x51\xdb\x2f\xd6\xda\xcb\x1d\x7b\x2b\xe3\x29\x6f\x03\xad\xce\x05\xa7\xab\x34\x52\xb8\x94\xd3\x08\x5b\x9f\x0d\xec\x27\x09\xce\xb5\x82\x89\x43\xe0\xc3\xc3\x7f\xad\xeb\x30\x0a\x5c\xa8\x88\xc8\x38\x02\x18\x4d\xda\x80\x02\xf5\xb0\x0b\xbf\x3b\xbc\x11\x6b\xe7\xfd\x4b\x4a\xe9\x48\x31\x9f\x3a\x83\x80\x7b\x21\x73\xf8\x99\x43\x1b\xd6\x1a\xb6\xce\xe4\xff\x0e\x58\x33\x86\xd0\x09\x70\x14\x63\xc6\x45\x8f\x2a\x5f\xc8\xb2\x82\xdc\x4f\x99\x81\xa8\x87\xe4\xbf\xc5\xfe\x35\x81\x73\x63\x21\xf1\x82\xdb\x73\xfe\xe2\x1b\x5f\xff\x07\x8b\xb4\xef\xb6\x6a\x92\x9c\xcf\x6d\x09\xb1\xc1\x78\xa4\x56\x37\xe4\x6a\xf9\x01\x1e\x8c\x51\x14\x10\x34\xbd\xb0\x4f\xc6\xcb\xd6\xf4\xee\xed\x7c\x23\xa2\x80\xde\x5d\x76\x9d\x09\xd8\x1d\x45\x21\xc1\xad\xe9\x74\xf2\x61\xd4\x0b\xc7\x0d\x6a\xab\x25\x7c\x19\xa3\xf0\x88\x87\x7b\xba\xf0\x37\x3f\x59\x8f\x7f\x8e\x25\xbb\x80\x70\xf2\xe3\xf5\x0a\xa5\xb5\x2c\x43\x6f\xf1\x7b\xd3\x48\x86\x9a\xa2\xb1\x42\x89\xf3\x00\x0e\x9d\x99\xca\x5e\xb0\x2a\xf7\x46\xe6\xfb\xb9\x22\xc9\x14\xb9\x75\x95\x82\x87\x0d\x9a\x54\x80\xf6\xbc\x1f\xd9\xcb\x09\x0c\x4b\x5e\x38\xa1\x10\xaa\x32\xb1\xfa\xcc\xba\x37\x37\x01\x6d\x7f\xf1\x9d\x49\x35\x6a\x5b\xec\xec\xfb\x6a\x46\xca\x41\x03\x35\xfb\x56\xef\x5b\xe2\x44\xa0\x9e\xf8\x99\xde\x92\x17\x12\x98\x5e\x11\xe0\x73\x94\x23\xc9\x81\x61\xcc\x8a\xb4\x72\x5d\x6e\x1b\xfb\xa4\x3c\x79\x06\x12\xd3\x00\x47\xa7\x8e\x8c\x42\x9d\xa4\xfd\x34\xcd\xf0\x94\xdc\x3c\x84\xe3\xf7\xfc\x16\xd8\x0d\x4a\x9d\x05\xe1\xff\x1b\x47\xf1\xdc\xdf\xa4\x86\x09\xc1\xfe\xde\x45\xe4\x43\xfd\x0d\x05\xf4\x3f\xb5\x2e\xe7\x48\xde\xc8\x2b\x8a\x5f\xee\x28\x66\x09\xb4\x65\x12\x77\x23\x6a\xe2\x80\xa4\xc2\xa5\x1e\xbe\xd9\x8e\xae\x56\x4d\x56\xfe\xed\xe8\x0e\x39\xab\xba\x68\xfd\x39\x2c\x22\x30\x80\x31\xfe\x34\x46\x7d\xea\x3c\x8e\x5b\x87\xef\xac\x2d\xe3\x80\x19\x5a\xd8\xba\x63\xd5\xb4\x59\xc0\x38\xff\xc5\xd8\x00\x75\x8e\x31\x7c\x1f\x90\x98\xdc\x4a\x9c\x67\x84\x12\x87\xb2\x06\xcc\x5c\x41\xc4\xa2\x22\x88\x2d\xf5\x43\xdc\x5f\xe8\x71\xa0\x0f\xbd\xa8\x33\x6f\x83\xbf\xc0\x3a\xfd\xa7\xf9\x8a\x93\x12\x94\x0a\x9e\x39\x68\x60\xc2\xfe\x0a\x2c\x13\xb6\x25\x5a\x85\x62\x1c\x5b\x44\x2c\x05\x5b"s;// xxd -p + std::string reference4 = "\xe6\x92\x56\x6b\x2f\xf6\x4f\xfb\xdc\x46\x9c\xe2\xbd\xac\xc0\x0d\x53\x44\x4d\x29\xd3\xe3\x61\x06\x77\xfb\x0c\x1b\xfa\x05\x17\x3b\x32\xc8\x6c\xd3\x0e\xa8\x18\xde\x64\xfb\x8a\xb8\x84\xf6\x3f\x17\xc4\x1f\xea\x8c\xea\xd5\x42\xc1\xb3\xdb\x68\x90\x8a\x24\x2f\x0c\xc5\x9b\xb6\xd6\x16\x5d\x3d\x38\xf1\xf6\x80\xf2\x56\x47\xf3\x95\x64\x7e\x50\x14\x02\x73\xa9\x0a\x04\x01\xcc\xf3\x1b\x3c\x9a\xfd\x98\x86\xdf\x54\xe6\x36\x50\xe9\xc0\x46\xd7\xae\x54\xd1\xe4\xaf\x98\xc3\xa6\xee\x44\xce\x8c\x16\xdf\x33\x87\x0b\xca\x12\x91\xac\xa4\xbe\x4e\xdb\xb2\x32\x21\x21\x16\xdb\x0c\x5f\xe3\x33\xbd\xa9\x8a\x88\xed\x3e\x65\x46\x4d\x8b\x16\xf0\x73\xe7\x76\x3d\x42\xb5\xe1\xba\x14\xe8\xd9\x99\x4f\x67\xc2\x20\x0d\x41\x07\x27\x61\x3a\x28\x49\x6f\x73\xdb\x44\xdb\xe2\x5e\x54\x4e\x1c\xe0\xd4\x66\x1e\xfe\x0c\x96\x52\xb3\x79\x00\x9d\x87\xed\xee\xc6\x82\x5e\xdc\x8f\xcd\xc8\xaa\x1c\x44\x76\x22\x14\x99\xef\x56\x73\x0e\x93\x14\x77\xa3\xa4\x52\xa7\xad\x55\x6c\xe2\x1a\x6a\x57\xd1\xb8\x4a\x8f\x3a\xa9\xcf\xab\x20\x25\xc8\xa8\x13\x30\x3c\x78\xbd\x3e\x9d\x73\x8f\xd9\x10\x9c\x15\xa8\x8a\x58\x70\x34\x38\xbb\xff\x26\x6d\x42\xcd\x2f\x8f\x7c\x20\x39\xa5\x37\x70\xf1\x1f\x65\x8a\xc5\xa3\x4f\x02\x57\x35\x17\x1b\x91\xa2\xa6\xd4\x67\x1c\x54\xde\xb4\xaf\x53\x99\x92\x23\xc1\x3d\xcc\x62\x9c\x21\xd9\xb5\xde\x5f\xd6\x1e\xa5\x4a\x45\x7e\x10\x74\xc4\x9e\x7f\x3b\xdd\xf6\x6c\xb6\xf2\xc9\xb6\xbe\x01\x45\x2e\x4a\x3b\xaf\x41\x05\x91\x38\x68\x35\x36\x0e\x1a\xc7\xc9\x52\x6d\xc1\x9c\x9e\x50\x29\x7b\x3e\xe0\x39\x67\x32\xe8\xae\xaa\xac\x0c\xbb\x18\x4c\x11\x3b\x58\xc8\x80\x88\xf1\x6d\x7a\x3d\x36\xd0\x8e\xc1\xb1\xf8\xbb\xa9\xd1\xd6\x8f\x07\x6b\x12\x1a\x5b\xf1\xea\xed\x94\x1b\xe1\x1b\xe7\x0e\x75\x3d\x4e\xcf\x5b\x91\x2e\x78\x55\xd8\x8d\x1f\x1b\x09\x60\x38\xd2\xb8\xaa\x1f\xb5\x9d\x2c\xd4\x5c\x44\x78\x1f\x88\x4c\xaf\xa6\x2c\xeb\xca\x00\x51\xbe\xc9\x2e\x60\xaf\x0d\xb4\x02\xb3\x47\x0a\x3f\x4b\xbc\xc4\xa4\xff\xbb\xb3\x0e\x4f\xb3\xf0\x71\x3a\x84\x9a\x3d\x36\x33\x25\xeb\x2f\x76\x66\x5e\xc3\xd0\x66\xfc\xd4\x10\x3b\x78\x15\x61\x2d\xfc\xe6\x05\x7e\xda\x86\x43\x15\xb9\x78\xc2\x8b\x98\x42\x3e\x56\x42\x69\xba\xa2\xf3\x1e\xec\x00\x00\x00\x00\x21\x83\x67\xa8\x14\xed\xdc\x51\xeb\x96\x93\x98\x74\x4d\x13\x7c\x00\x00\x00\x00\x00\x00\x00\x01\x10\x00\x09\x6c\x65\x6e\x2d\x6c\x69\x6d\x69\x74\x00\x00\x00\x0e\x00\x98\x32\x91\x09"s;// xxd -p + std::string reference5 = "\x00\x00\x00\x00"s;// xxd -p + std::string reference6 = "\x80\x00\x00\x00\x06\x20\x00\x00\x17\xd7\xf4\xbd\xdd\x5d\x39\xcc\xce\x7e\xe8\x6e\x9d\x92\x70\x2d\x96\x68\x9f\xba\x83\xe1\x99\x2d\x9f\xe4\xed\x65\x3f\x09\x88\x5d\x28\x5c\xc0\x99\x36\x80\x87\xdc\x02\xc0\xe5\x5a\xef\xae\x56\x95\x59\x91\xb6\xde\x35\xf4\x1c\x60\x1e\x30\xd1\x77\x1c\x70\x2d\xda\xed\xc5\xfc\x58\x8a\x28\x94\x2b\x4f\x96\x97\x18\xa0\x65\x22\x48\xa6\x06\x1b\x65\x7f\xf4\x82\x8f\xe3\x05\xde\x00\x70\xb7\xb5\xa4\x1e\xc3\x43\xe9\x49\x92\x8b\x47\xa6\xdd\x97\xd4\x93\x4d\xb4\xd0\x76\xc7\x4d\xeb\x71\x48\x77\x43\x91\xcd\xe5\x8f\x8d\xa2\xcb\x28\x53\xcf\x82\xa4\xd5\x85\x78\xae\x37\xd9\x19\x13\x54\x52\x0c\x7d\xcb\x2a\xfd\x1b\x38\x66\xaa\xd3\x23\xe6\xf7\x20\xd5\x0a\xf1\x4b\x59\xe6\x0b\xbe\x42\xa9\x5e\x7d\xce\xec\x73\xd9\x8b\xc6\x4b\x35\xe4\x69\xbc\x10\x35\x8a\x0e\x09\x2b\xf1\x9f\x38\x15\x57\x21\x08\xe1\xa6\x6e\xf1\x8c\x52\x08\x1b\x85\x50\xe0\x1e\x01\x35\x3a\x0a\x72\x1b\xb3\xda\xfd\x78\x36\x10\xb7\x1a\x2e\x93\xd4\x63\xab\x0b\x98\xfb\x4b\x97\x47\x7f\x61\x0f\x36\x7f\xfe\x02\x36\x2e\x30\xa5\xdb\x8f\xde\xd0\xc0\xc6\x9c\x3a\x7b\x71\x24\x1e\xc3\x04\xac\x31\x7a\xf5\xf3\x33\x26\x99\xa6\x4f\x43\x6c\x46\x5c\x4d\xf5\xb8\x43\x1d\xd7\x73\x3e\xe9\xb6\x3b\xdf\xff\xf6\xf3\x2a\x34\x3f\x39\x60\x4b\xed\xde\xf4\x2f\x5d\xe7\xab\xfe\xa1\x4d\x11\x9c\xcc\x41\xf8\x3c\xdd\x18\xea\xea\x45\x3e\xa5\x0b\xb5\x7b\x38\x5e\x26\x72\xdd\x24\x51\x48\xcf\x79\xa7\xd9\x06\x2e\xe8\xfb\x5d\x3d\x4a\x81\x0d\x15\x48\xd0\x84\x15\x0f\x15\x5b\xc3\x9d\x48\xc4\x9e\x2f\x45\xd4\x1c\x24\xc4\x90\x60\xe6\xa1\x19\x6f\x2d\x3a\xf9\x52\x0d\x06\x93\x21\xb4\xc2\x43\xd7\xce\x5b\xaa\x42\x20\x35\x6c\x45\xa2\xea\xd8\xe0\xc7\x90\xe3\x4a\x3e\xb1\x65\xaf\x5c\xe3\x23\x58\x65\x88\x92\x0b\x98\xc4\x3f\x7b\xb4\x42\x6c\x77\x7f\xf3\x51\x1a\x17\x89\x1f\x03\x66\x95\xbb\x83\x3f\xfb\xd1\x8c\x46\x40\x7f\xd1\xff\x7a\xbb\xb2\xcc\xaa\xc9\xfe\xbe\x7a\xff\x5b\xf7\x17\xe0\x4d\xca\x6b\xf2\xef\x0f\x0c\x48\x90\x5c\x6d\xa4\x53\xf4\xfb\xe3\xfe\x38\x61\xdb\x32\xe6\x6e\x35\x86\xad\xad\x33\x3b\x7f\x92\x7f\xf9\x4c\xbd\x92\xbe\x41\x4f\x23\x37\xa2\x6e\xd9\x7d\x82\x47\xa4\x8f\x77\x51\xdb\x2f\xd6\xda\xcb\x1d\x7b\x2b\xe3\x29\x6f\x03\xad\xce\x05\xa7\xab\x34\x52\xb8\x94\xd3\x08\x5b\x9f\x0d\xec\x27\x09\xce\xb5\x82\x89\x43\xe0\xc3\xc3\x7f\xad\xeb\x30\x0a\x5c\xa8\x88\xc8\x38\x02\x18\x4d\xda\x80\x02\xf5\xb0\x0b\xbf\x3b\xbc\x11\x6b\xe7\xfd\x4b\x4a\xe9\x48\x31\x9f\x3a\x83\x80\x7b\x21\x73\xf8\x99\x43\x1b\xd6\x1a\xb6\xce\xe4\xff\x0e\x58\x33\x86\xd0\x09\x70\x14\x63\xc6\x45\x8f\x2a\x5f\xc8\xb2\x82\xdc\x4f\x99\x81\xa8\x87\xe4\xbf\xc5\xfe\x35\x81\x73\x63\x21\xf1\x82\xdb\x73\xfe\xe2\x1b\x5f\xff\x07\x8b\xb4\xef\xb6\x6a\x92\x9c\xcf\x6d\x09\xb1\xc1\x78\xa4\x56\x37\xe4\x6a\xf9\x01\x1e\x8c\x51\x14\x10\x34\xbd\xb0\x4f\xc6\xcb\xd6\xf4\xee\xed\x7c\x23\xa2\x80\xde\x5d\x76\x9d\x09\xd8\x1d\x45\x21\xc1\xad\xe9\x74\xf2\x61\xd4\x0b\xc7\x0d\x6a\xab\x25\x7c\x19\xa3\xf0\x88\x87\x7b\xba\xf0\x37\x3f\x59\x8f\x7f\x8e\x25\xbb\x80\x70\xf2\xe3\xf5\x0a\xa5\xb5\x2c\x43\x6f\xf1\x7b\xd3\x48\x86\x9a\xa2\xb1\x42\x89\xf3\x00\x0e\x9d\x99\xca\x5e\xb0\x2a\xf7\x46\xe6\xfb\xb9\x22\xc9\x14\xb9\x75\x95\x82\x87\x0d\x9a\x54\x80\xf6\xbc\x1f\xd9\xcb\x09\x0c\x4b\x5e\x38\xa1\x10\xaa\x32\xb1\xfa\xcc\xba\x37\x37\x01\x6d\x7f\xf1\x9d\x49\x35\x6a\x5b\xec\xec\xfb\x6a\x46\xca\x41\x03\x35\xfb\x56\xef\x5b\xe2\x44\xa0\x9e\xf8\x99\xde\x92\x17\x12\x98\x5e\x11\xe0\x73\x94\x23\xc9\x81\x61\xcc\x8a\xb4\x72\x5d\x6e\x1b\xfb\xa4\x3c\x79\x06\x12\xd3\x00\x47\xa7\x8e\x8c\x42\x9d\xa4\xfd\x34\xcd\xf0\x94\xdc\x3c\x84\xe3\xf7\xfc\x16\xd8\x0d\x4a\x9d\x05\xe1\xff\x1b\x47\xf1\xdc\xdf\xa4\x86\x09\xc1\xfe\xde\x45\xe4\x43\xfd\x0d\x05\xf4\x3f\xb5\x2e\xe7\x48\xde\xc8\x2b\x8a\x5f\xee\x28\x66\x09\xb4\x65\x12\x77\x23\x6a\xe2\x80\xa4\xc2\xa5\x1e\xbe\xd9\x8e\xae\x56\x4d\x56\xfe\xed\xe8\x0e\x39\xab\xba\x68\xfd\x39\x2c\x22\x30\x80\x31\xfe\x34\x46\x7d\xea\x3c\x8e\x5b\x87\xef\xac\x2d\xe3\x80\x19\x5a\xd8\xba\x63\xd5\xb4\x59\xc0\x38\xff\xc5\xd8\x00\x75\x8e\x31\x7c\x1f\x90\x98\xdc\x4a\x9c\x67\x84\x12\x87\xb2\x06\xcc\x5c\x41\xc4\xa2\x22\x88\x2d\xf5\x43\xdc\x5f\xe8\x71\xa0\x0f\xbd\xa8\x33\x6f\x83\xbf\xc0\x3a\xfd\xa7\xf9\x8a\x93\x12\x94\x0a\x9e\x39\x68\x60\xc2\xfe\x0a\x2c\x13\xb6\x25\x5a\x85\x62\x1c\x5b\x44\x2c\x05\x5b\xe6\x92\x56\x6b"s;// xxd -p + + + { + ////chunked_reader r_flat = chunked_reader(fastafs_file.c_str()); + + Context c = Context(fastafs_file.c_str()); + //c.findState(); // let it self determine to choose Zstd or Flat + } +} + BOOST_AUTO_TEST_SUITE_END() From 5f422846863640947e3436000181eeaeb9e5e180 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sun, 20 Feb 2022 12:24:23 +0100 Subject: [PATCH 19/65] fix minor bug --- include/chunked_reader.hpp | 2 +- src/chunked_reader.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index 2dbc9a56..52c33ff3 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -107,7 +107,7 @@ class Context // master chunked_reader public: void TransitionTo(State *state); - Context : state_(nullptr) (const char * arg_filename); + Context (const char * arg_filename); static State* find_state(const char *arg_filename); }; diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index c32c877a..7b324dc3 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -260,7 +260,7 @@ size_t chunked_reader::tell() -Context::Context(const char * arg_filename) +Context::Context(const char * arg_filename) : state_(nullptr) { printf("Constructor alive\n"); From 7679a25905443749411f555cc25ba2e734f00ece Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sun, 20 Feb 2022 12:30:45 +0100 Subject: [PATCH 20/65] elegant state init --- include/chunked_reader.hpp | 4 ++-- src/chunked_reader.cpp | 14 +++++++++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index 52c33ff3..d8ebd21e 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -106,8 +106,8 @@ class Context // master chunked_reader State *state_; public: - void TransitionTo(State *state); - Context (const char * arg_filename); + void TransitionTo(State *state); // @todo rename to set_compression_type + Context(const char * arg_filename); static State* find_state(const char *arg_filename); }; diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 7b324dc3..5bc19704 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -264,7 +264,19 @@ Context::Context(const char * arg_filename) : state_(nullptr) { printf("Constructor alive\n"); - State *state = Context::find_state(arg_filename); + this->TransitionTo(Context::find_state(arg_filename)); +} + +void Context::TransitionTo(State *state) { + std::cout << "Context: Transition to " << typeid(*state).name() << ".\n"; + + if (this->state_ != nullptr) + { + delete this->state_; // delete and destruct previous state, incl file points, should also run fh.close(); etc. + } + + this->state_ = state; + //this->state_->set_context(this); } State * Context::find_state(const char * arg_filename) From 3d4dd40d3d937b0052eb10f3b53aa208dc2c49dd Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sun, 20 Feb 2022 20:18:13 +0100 Subject: [PATCH 21/65] expand State pattern --- include/chunked_reader.hpp | 34 ++++++++++++++++++++++++++++------ src/chunked_reader.cpp | 29 +++++++++++++++++------------ 2 files changed, 45 insertions(+), 18 deletions(-) diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index d8ebd21e..b1f4f3f8 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -75,39 +75,61 @@ class chunked_reader // dit is Context + +class Context; + class State { +protected: + Context *context; // back-reference to context, to access file_i etc. + public: - public: virtual ~State() {}; - void set_context();// - + void set_context(Context *); + + + // virtual functions: virtual void update_buffer() = 0; }; // comrpession type + + class ContextUncompressed : public State { private: - uint i; // implementation specific integer + std::ifstream *fh_flat; + public: void update_buffer() override; }; class ContextZstdSeekable : public State { +private: + ZSTD_seekable_decompress_init_data* fh_zstd; + public: void update_buffer() override; }; + class Context // master chunked_reader { +private: + char buffer[READ_BUFFER_SIZE + 1]; + + size_t buffer_i; + size_t buffer_n; + + off_t file_i; + protected: - State *state_; + State *state; public: void TransitionTo(State *state); // @todo rename to set_compression_type - Context(const char * arg_filename); + Context(const char * arg_filename) ; static State* find_state(const char *arg_filename); }; diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 5bc19704..e12fdc0d 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -254,36 +254,41 @@ size_t chunked_reader::tell() +void State::set_context(Context *arg_context) +{ + this->context = arg_context; +} - - -Context::Context(const char * arg_filename) : state_(nullptr) +Context::Context(const char * arg_filename) : buffer("\0"), buffer_i(0), buffer_n(0), file_i(0), state(nullptr) { printf("Constructor alive\n"); - + this->TransitionTo(Context::find_state(arg_filename)); } -void Context::TransitionTo(State *state) { - std::cout << "Context: Transition to " << typeid(*state).name() << ".\n"; +void Context::TransitionTo(State *arg_state) { + std::cout << "Context: Transition to " << typeid(*arg_state).name() << ".\n"; - if (this->state_ != nullptr) + if (this->state != nullptr) { - delete this->state_; // delete and destruct previous state, incl file points, should also run fh.close(); etc. + delete this->state; // delete and destruct previous state, incl file points, should also run fh.close(); etc. } - this->state_ = state; - //this->state_->set_context(this); + this->state = arg_state; + this->state->set_context(this); } State * Context::find_state(const char * arg_filename) { - if(is_zstd_file(arg_filename)) { + if(is_zstd_file(arg_filename)) + { return new ContextZstdSeekable; - } else { + } + else + { return new ContextUncompressed; } } From f257f9ff26c80ef33ddef6377cf1165b19d6d715 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sun, 20 Feb 2022 20:28:05 +0100 Subject: [PATCH 22/65] expand State pattern --- include/chunked_reader.hpp | 6 ++++- src/chunked_reader.cpp | 28 +++++++++++++++++++++ test/chunked_reader/test_chunked_reader.cpp | 3 ++- 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index b1f4f3f8..bdd833da 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -89,6 +89,7 @@ class State // virtual functions: + virtual void fopen(off_t) = 0; virtual void update_buffer() = 0; }; // comrpession type @@ -98,9 +99,10 @@ class State class ContextUncompressed : public State { private: - std::ifstream *fh_flat; + std::ifstream *fh_flat;// = nullptr; public: + void fopen(off_t) override; void update_buffer() override; }; @@ -110,6 +112,7 @@ class ContextZstdSeekable : public State ZSTD_seekable_decompress_init_data* fh_zstd; public: + void fopen(off_t) override; void update_buffer() override; }; @@ -132,6 +135,7 @@ class Context // master chunked_reader Context(const char * arg_filename) ; static State* find_state(const char *arg_filename); + void fopen(off_t); }; diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index e12fdc0d..6f9edde2 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -281,6 +281,15 @@ void Context::TransitionTo(State *arg_state) { this->state->set_context(this); } +void Context::fopen(off_t file_offset) +{ + this->state->fopen(file_offset); +} + + + + + State * Context::find_state(const char * arg_filename) { if(is_zstd_file(arg_filename)) @@ -294,10 +303,29 @@ State * Context::find_state(const char * arg_filename) } +void ContextUncompressed::fopen(off_t start_pos) +{ + //this->fh_flat->clear(); // reset error state + + //if(!this->fh_flat->is_open()) { + // this->fh_flat->open(this->filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate); + //} + + //this->fh_flat->seekg(offset, std::ios::beg); + //this->update_flat_buffer(); +} void ContextUncompressed::update_buffer() { printf("hello Uncompr\n"); } + + + void ContextZstdSeekable::update_buffer() { printf("hello ZstdSeek\n"); } + +void ContextZstdSeekable::fopen(off_t start_pos) +{ + +} diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index b37cffb2..94780799 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -334,7 +334,8 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__new_style) ////chunked_reader r_flat = chunked_reader(fastafs_file.c_str()); Context c = Context(fastafs_file.c_str()); - //c.findState(); // let it self determine to choose Zstd or Flat + c.fopen(0); // open file handle + } } From dafab8b19dccbc0a6aad160f8cd724fc00c8cbf1 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sun, 20 Feb 2022 20:55:15 +0100 Subject: [PATCH 23/65] update OOP structure --- include/chunked_reader.hpp | 14 +++++------ src/chunked_reader.cpp | 28 +++++++++++++-------- test/chunked_reader/test_chunked_reader.cpp | 2 +- 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index bdd833da..8c86fd96 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -89,7 +89,7 @@ class State // virtual functions: - virtual void fopen(off_t) = 0; + virtual void fopen(const char *, off_t) = 0; virtual void update_buffer() = 0; }; // comrpession type @@ -102,7 +102,7 @@ class ContextUncompressed : public State std::ifstream *fh_flat;// = nullptr; public: - void fopen(off_t) override; + void fopen(const char *, off_t) override; void update_buffer() override; }; @@ -112,7 +112,7 @@ class ContextZstdSeekable : public State ZSTD_seekable_decompress_init_data* fh_zstd; public: - void fopen(off_t) override; + void fopen(const char *, off_t) override; void update_buffer() override; }; @@ -131,11 +131,11 @@ class Context // master chunked_reader State *state; public: - void TransitionTo(State *state); // @todo rename to set_compression_type - Context(const char * arg_filename) ; + void TransitionTo(State *); // @todo rename to set_compression_type + Context(const char *) ; - static State* find_state(const char *arg_filename); - void fopen(off_t); + static State* find_state(const char *); + void fopen(const char *, off_t); }; diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 6f9edde2..7ec05127 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -281,9 +281,9 @@ void Context::TransitionTo(State *arg_state) { this->state->set_context(this); } -void Context::fopen(off_t file_offset) +void Context::fopen(const char * arg_filename, off_t file_offset) { - this->state->fopen(file_offset); + this->state->fopen(arg_filename, file_offset); } @@ -303,16 +303,24 @@ State * Context::find_state(const char * arg_filename) } -void ContextUncompressed::fopen(off_t start_pos) +void ContextUncompressed::fopen(const char * arg_filename, off_t start_pos) { - //this->fh_flat->clear(); // reset error state + if(this->fh_flat != nullptr) + { + throw std::runtime_error("[ContextUncompressed::fopen] opening a non closed reader.\n"); + } - //if(!this->fh_flat->is_open()) { - // this->fh_flat->open(this->filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate); - //} + this->fh_flat = new std::ifstream; + this->fh_flat->open(arg_filename, std::ios::in | std::ios::binary | std::ios::ate); - //this->fh_flat->seekg(offset, std::ios::beg); - //this->update_flat_buffer(); + /* + if(this->fh_flat->is_open()) { + this->fh_flat->seekg(0, std::ios::beg); + this->update_flat_buffer(); + } else { + throw std::runtime_error("[chunked_reader::init] Cannot open file for reading.\n"); + } + */ } void ContextUncompressed::update_buffer() { @@ -325,7 +333,7 @@ void ContextZstdSeekable::update_buffer() { printf("hello ZstdSeek\n"); } -void ContextZstdSeekable::fopen(off_t start_pos) +void ContextZstdSeekable::fopen(const char * arg_filename, off_t start_pos) { } diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index 94780799..98910cb3 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -334,7 +334,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__new_style) ////chunked_reader r_flat = chunked_reader(fastafs_file.c_str()); Context c = Context(fastafs_file.c_str()); - c.fopen(0); // open file handle + c.fopen(fastafs_file.c_str(), 0); // open file handle } } From 46d61322b08ddba72248bdace4da438394ef2c0b Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Wed, 23 Feb 2022 10:41:37 +0100 Subject: [PATCH 24/65] filename as context property --- include/chunked_reader.hpp | 17 ++++++++------ src/chunked_reader.cpp | 25 ++++++++++++--------- test/chunked_reader/test_chunked_reader.cpp | 2 +- 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index 8c86fd96..eb688884 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -89,7 +89,7 @@ class State // virtual functions: - virtual void fopen(const char *, off_t) = 0; + virtual void fopen(off_t) = 0; virtual void update_buffer() = 0; }; // comrpession type @@ -102,7 +102,7 @@ class ContextUncompressed : public State std::ifstream *fh_flat;// = nullptr; public: - void fopen(const char *, off_t) override; + void fopen(off_t) override; void update_buffer() override; }; @@ -112,14 +112,16 @@ class ContextZstdSeekable : public State ZSTD_seekable_decompress_init_data* fh_zstd; public: - void fopen(const char *, off_t) override; + void fopen(off_t) override; void update_buffer() override; }; class Context // master chunked_reader { -private: +protected: + std::string filename; + char buffer[READ_BUFFER_SIZE + 1]; size_t buffer_i; @@ -127,15 +129,16 @@ class Context // master chunked_reader off_t file_i; -protected: State *state; public: void TransitionTo(State *); // @todo rename to set_compression_type Context(const char *) ; - static State* find_state(const char *); - void fopen(const char *, off_t); + State* find_state(); + void fopen(off_t); + + const std::string& get_filename(); }; diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 7ec05127..15378848 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -262,11 +262,16 @@ void State::set_context(Context *arg_context) -Context::Context(const char * arg_filename) : buffer("\0"), buffer_i(0), buffer_n(0), file_i(0), state(nullptr) +Context::Context(const char * arg_filename) : filename(arg_filename), buffer("\0"), buffer_i(0), buffer_n(0), file_i(0), state(nullptr) { printf("Constructor alive\n"); - this->TransitionTo(Context::find_state(arg_filename)); + this->TransitionTo(this->find_state()); +} + +const std::string& Context::get_filename() +{ + return this->filename; } void Context::TransitionTo(State *arg_state) { @@ -281,18 +286,18 @@ void Context::TransitionTo(State *arg_state) { this->state->set_context(this); } -void Context::fopen(const char * arg_filename, off_t file_offset) +void Context::fopen(off_t file_offset) { - this->state->fopen(arg_filename, file_offset); + this->state->fopen(file_offset); } -State * Context::find_state(const char * arg_filename) +State * Context::find_state() { - if(is_zstd_file(arg_filename)) + if(is_zstd_file(this->filename.c_str())) { return new ContextZstdSeekable; } @@ -303,7 +308,7 @@ State * Context::find_state(const char * arg_filename) } -void ContextUncompressed::fopen(const char * arg_filename, off_t start_pos) +void ContextUncompressed::fopen(off_t start_pos) { if(this->fh_flat != nullptr) { @@ -311,7 +316,7 @@ void ContextUncompressed::fopen(const char * arg_filename, off_t start_pos) } this->fh_flat = new std::ifstream; - this->fh_flat->open(arg_filename, std::ios::in | std::ios::binary | std::ios::ate); + this->fh_flat->open(this->context->get_filename().c_str(), std::ios::in | std::ios::binary | std::ios::ate); /* if(this->fh_flat->is_open()) { @@ -333,7 +338,7 @@ void ContextZstdSeekable::update_buffer() { printf("hello ZstdSeek\n"); } -void ContextZstdSeekable::fopen(const char * arg_filename, off_t start_pos) +void ContextZstdSeekable::fopen(off_t start_pos) { - + throw std::runtime_error("[ContextZstdSeekable::fopen] not implemented.\n"); } diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index 98910cb3..94780799 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -334,7 +334,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__new_style) ////chunked_reader r_flat = chunked_reader(fastafs_file.c_str()); Context c = Context(fastafs_file.c_str()); - c.fopen(fastafs_file.c_str(), 0); // open file handle + c.fopen(0); // open file handle } } From 791aa3b9cba5d92e717645b3969ffddf6b333462 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Wed, 23 Feb 2022 10:55:11 +0100 Subject: [PATCH 25/65] fopen working for non compressed files --- include/chunked_reader.hpp | 4 +++- src/chunked_reader.cpp | 39 ++++++++++++++++++++++++++++---------- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index eb688884..205e0ea1 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -99,11 +99,13 @@ class State class ContextUncompressed : public State { private: - std::ifstream *fh_flat;// = nullptr; + std::ifstream *fh = nullptr; public: void fopen(off_t) override; void update_buffer() override; + + ~ContextUncompressed() override; }; class ContextZstdSeekable : public State diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 15378848..9a019519 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -308,30 +308,49 @@ State * Context::find_state() } -void ContextUncompressed::fopen(off_t start_pos) +void ContextUncompressed::fopen(off_t start_pos = 0) { - if(this->fh_flat != nullptr) + if(this->fh != nullptr) { throw std::runtime_error("[ContextUncompressed::fopen] opening a non closed reader.\n"); } - this->fh_flat = new std::ifstream; - this->fh_flat->open(this->context->get_filename().c_str(), std::ios::in | std::ios::binary | std::ios::ate); + this->fh = new std::ifstream; + this->fh->open(this->context->get_filename().c_str(), std::ios::in | std::ios::binary | std::ios::ate); - /* - if(this->fh_flat->is_open()) { - this->fh_flat->seekg(0, std::ios::beg); - this->update_flat_buffer(); + + if(this->fh->is_open()) { + this->fh->seekg(start_pos, std::ios::beg); + //this->context->update_flat_buffer(); } else { throw std::runtime_error("[chunked_reader::init] Cannot open file for reading.\n"); } - */ } -void ContextUncompressed::update_buffer() { +void ContextUncompressed::update_buffer() +{ printf("hello Uncompr\n"); } +ContextUncompressed::~ContextUncompressed() +{ + if(this->fh != nullptr) + { + if(!this->fh) + { + this->fh->close(); + throw std::runtime_error("[ContextUncompressed::~ContextUncompressed] unexpected closed filehandle found.\n"); + } + else + { + this->fh->close(); + } + + delete this->fh; + } +} + + void ContextZstdSeekable::update_buffer() { From a1143220014996d9acd31f95920bd4c82ff96afa Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Wed, 23 Feb 2022 11:03:20 +0100 Subject: [PATCH 26/65] flat format reads into main context buffer --- include/chunked_reader.hpp | 11 ++++++----- src/chunked_reader.cpp | 15 ++++++++++++--- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index 205e0ea1..485a92f2 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -81,7 +81,7 @@ class Context; class State { protected: - Context *context; // back-reference to context, to access file_i etc. + Context *context; // back-reference to context, to access file_i, filename etc. public: virtual ~State() {}; @@ -90,7 +90,7 @@ class State // virtual functions: virtual void fopen(off_t) = 0; - virtual void update_buffer() = 0; + virtual void read_into_buffer() = 0; // formerly update_..._buffer }; // comrpession type @@ -103,7 +103,7 @@ class ContextUncompressed : public State public: void fopen(off_t) override; - void update_buffer() override; + void read_into_buffer() override; ~ContextUncompressed() override; }; @@ -111,11 +111,11 @@ class ContextUncompressed : public State class ContextZstdSeekable : public State { private: - ZSTD_seekable_decompress_init_data* fh_zstd; + ZSTD_seekable_decompress_init_data* fh; public: void fopen(off_t) override; - void update_buffer() override; + void read_into_buffer() override; }; @@ -141,6 +141,7 @@ class Context // master chunked_reader void fopen(off_t); const std::string& get_filename(); + char * get_buffer(); }; diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 9a019519..dee1de8a 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -274,6 +274,11 @@ const std::string& Context::get_filename() return this->filename; } +char * Context::get_buffer() +{ + return &(this->buffer[0]); +} + void Context::TransitionTo(State *arg_state) { std::cout << "Context: Transition to " << typeid(*arg_state).name() << ".\n"; @@ -327,9 +332,13 @@ void ContextUncompressed::fopen(off_t start_pos = 0) } } -void ContextUncompressed::update_buffer() +void ContextUncompressed::read_into_buffer() { - printf("hello Uncompr\n"); + this->fh->read(this->context->get_buffer(), READ_BUFFER_SIZE); + + //this->buffer_i = 0; + //this->buffer_n = (size_t) this->fh->gcount(); + //this->file_i += this->buffer_n; } ContextUncompressed::~ContextUncompressed() @@ -353,7 +362,7 @@ ContextUncompressed::~ContextUncompressed() -void ContextZstdSeekable::update_buffer() { +void ContextZstdSeekable::read_into_buffer() { printf("hello ZstdSeek\n"); } From 6395de35baed6c02b3245fa8f3b2c8ff1f35f79f Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Wed, 23 Feb 2022 11:16:15 +0100 Subject: [PATCH 27/65] reads to buffer and keeps context properties private, through State pattern --- include/chunked_reader.hpp | 9 +++++---- src/chunked_reader.cpp | 22 ++++++++++++++++------ 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index 485a92f2..720edd91 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -90,7 +90,7 @@ class State // virtual functions: virtual void fopen(off_t) = 0; - virtual void read_into_buffer() = 0; // formerly update_..._buffer + virtual size_t read_into_buffer() = 0; // formerly update_..._buffer }; // comrpession type @@ -103,7 +103,7 @@ class ContextUncompressed : public State public: void fopen(off_t) override; - void read_into_buffer() override; + size_t read_into_buffer() override; ~ContextUncompressed() override; }; @@ -115,7 +115,7 @@ class ContextZstdSeekable : public State public: void fopen(off_t) override; - void read_into_buffer() override; + size_t read_into_buffer() override; }; @@ -141,7 +141,8 @@ class Context // master chunked_reader void fopen(off_t); const std::string& get_filename(); - char * get_buffer(); + char* get_buffer(); + size_t read_into_buffer(); }; diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index dee1de8a..c9f0463e 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -279,6 +279,16 @@ char * Context::get_buffer() return &(this->buffer[0]); } +size_t Context::read_into_buffer() +{ + this->buffer_n = this->state->read_into_buffer(); + + this->buffer_i = 0; + this->file_i += this->buffer_n; +} + + + void Context::TransitionTo(State *arg_state) { std::cout << "Context: Transition to " << typeid(*arg_state).name() << ".\n"; @@ -326,19 +336,17 @@ void ContextUncompressed::fopen(off_t start_pos = 0) if(this->fh->is_open()) { this->fh->seekg(start_pos, std::ios::beg); - //this->context->update_flat_buffer(); + //this->read_into_buffer(); } else { throw std::runtime_error("[chunked_reader::init] Cannot open file for reading.\n"); } } -void ContextUncompressed::read_into_buffer() +size_t ContextUncompressed::read_into_buffer() { this->fh->read(this->context->get_buffer(), READ_BUFFER_SIZE); - //this->buffer_i = 0; - //this->buffer_n = (size_t) this->fh->gcount(); - //this->file_i += this->buffer_n; + return this->fh->gcount(); } ContextUncompressed::~ContextUncompressed() @@ -362,8 +370,10 @@ ContextUncompressed::~ContextUncompressed() -void ContextZstdSeekable::read_into_buffer() { +size_t ContextZstdSeekable::read_into_buffer() { printf("hello ZstdSeek\n"); + + return 0; } void ContextZstdSeekable::fopen(off_t start_pos) From 0be7ab776904d488d7cdcf750947fc0cccac6063 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Wed, 23 Feb 2022 17:36:51 +0100 Subject: [PATCH 28/65] small improvements --- include/chunked_reader.hpp | 14 ++-- src/chunked_reader.cpp | 89 +++++++++++++++++++-- test/chunked_reader/test_chunked_reader.cpp | 4 +- 3 files changed, 94 insertions(+), 13 deletions(-) diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index 720edd91..ecd73cce 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -90,7 +90,8 @@ class State // virtual functions: virtual void fopen(off_t) = 0; - virtual size_t read_into_buffer() = 0; // formerly update_..._buffer + virtual size_t cache_buffer() = 0; // formerly update_..._buffer + virtual size_t read(char *, size_t, size_t &, size_t &) = 0; }; // comrpession type @@ -103,7 +104,8 @@ class ContextUncompressed : public State public: void fopen(off_t) override; - size_t read_into_buffer() override; + size_t cache_buffer() override; + size_t read(char *, size_t, size_t &, size_t &) override; ~ContextUncompressed() override; }; @@ -111,11 +113,12 @@ class ContextUncompressed : public State class ContextZstdSeekable : public State { private: - ZSTD_seekable_decompress_init_data* fh; + ZSTD_seekable_decompress_init_data* fh = nullptr; public: void fopen(off_t) override; - size_t read_into_buffer() override; + size_t cache_buffer() override; + size_t read(char *, size_t, size_t &, size_t &) override; }; @@ -142,7 +145,8 @@ class Context // master chunked_reader const std::string& get_filename(); char* get_buffer(); - size_t read_into_buffer(); + size_t cache_buffer(); + size_t read(char *, size_t); }; diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index c9f0463e..1697f1cb 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -279,14 +279,33 @@ char * Context::get_buffer() return &(this->buffer[0]); } -size_t Context::read_into_buffer() +size_t Context::cache_buffer() { - this->buffer_n = this->state->read_into_buffer(); + + printf("[x] this->buffer_i = %i, this->buffer_n = %i, this->file_i = %i\n",this->buffer_i, this->buffer_n, this->file_i); + + size_t s = this->state->cache_buffer(); + printf("\n",s); + this->buffer_n = s; this->buffer_i = 0; - this->file_i += this->buffer_n; + this->file_i += s; + + printf("[y] this->buffer_i = %i, this->buffer_n = %i, this->file_i = %i\n",this->buffer_i, this->buffer_n, this->file_i); } +size_t Context::read(char *arg_buffer, size_t arg_buffer_size) +{ + //arg_buffer_size = std::min(arg_buffer_size, (size_t) READ_BUFFER_SIZE); +#if DEBUG + if(arg_buffer_size > READ_BUFFER_SIZE) + { + throw std::runtime_error("[ContextUncompressed::read] Requested buffer size larger than internal context buffer.\n"); + } +#endif //DEBUG + + return this->state->read(arg_buffer, arg_buffer_size, this->buffer_i, this->buffer_n); +} void Context::TransitionTo(State *arg_state) { @@ -303,7 +322,11 @@ void Context::TransitionTo(State *arg_state) { void Context::fopen(off_t file_offset) { + printf("[1] this->buffer_i = %i, this->buffer_n = %i, this->file_i = %i\n",this->buffer_i, this->buffer_n, this->file_i); this->state->fopen(file_offset); + printf("[2] this->buffer_i = %i, this->buffer_n = %i, this->file_i = %i\n",this->buffer_i, this->buffer_n, this->file_i); + this->cache_buffer(); + printf("[3] this->buffer_i = %i, this->buffer_n = %i, this->file_i = %i\n",this->buffer_i, this->buffer_n, this->file_i); } @@ -336,17 +359,62 @@ void ContextUncompressed::fopen(off_t start_pos = 0) if(this->fh->is_open()) { this->fh->seekg(start_pos, std::ios::beg); - //this->read_into_buffer(); } else { throw std::runtime_error("[chunked_reader::init] Cannot open file for reading.\n"); } } -size_t ContextUncompressed::read_into_buffer() +size_t ContextUncompressed::cache_buffer() { this->fh->read(this->context->get_buffer(), READ_BUFFER_SIZE); - return this->fh->gcount(); + if(!this->fh) + { + printf("[%s]\n", this->context->get_buffer()); + throw std::runtime_error("[ContextUncompressed::cache_buffer] Coult not open file. \n"); + } + + printf("ContextUncompressed::cache_buffer\n"); + + size_t s = (size_t) this->fh->gcount(); + printf("[s=%i]\n",s); + + return s; +} + +size_t ContextUncompressed::read(char *arg_buffer_to, size_t arg_buffer_to_size, + size_t &buffer_i, size_t &buffer_n) +{ +#if DEBUG + if(arg_buffer_to_size > READ_BUFFER_SIZE) + { + throw std::runtime_error("[ContextUncompressed::read] Requested buffer size larger than internal context buffer.\n"); + } +#endif //DEBUG + + size_t written = 0; + size_t n = std::min(buffer_n - buffer_i, arg_buffer_to_size); + + printf("buffer_n = %i, buffer_i = %i, arg_buffer_to_size = %i, n = %i\n",buffer_n, buffer_i, arg_buffer_to_size, n); + + // copy current internal buffer completely + while(written < n) + { + arg_buffer_to[written++] = this->context->get_buffer()[buffer_i++]; + } + + if(written < arg_buffer_to_size) { + // + + // same loop again + /* + while(this->buffer_i < this->buffer_n and written < buffer_size) { + arg_buffer[written++] = this->buffer[this->buffer_i++]; + } */ + printf("recursively call another read\n"); + } + + return written; } ContextUncompressed::~ContextUncompressed() @@ -370,7 +438,7 @@ ContextUncompressed::~ContextUncompressed() -size_t ContextZstdSeekable::read_into_buffer() { +size_t ContextZstdSeekable::cache_buffer() { printf("hello ZstdSeek\n"); return 0; @@ -380,3 +448,10 @@ void ContextZstdSeekable::fopen(off_t start_pos) { throw std::runtime_error("[ContextZstdSeekable::fopen] not implemented.\n"); } + +size_t ContextZstdSeekable::read(char *arg_buffer_to, size_t arg_buffer_to_size, + size_t &buffer_i, size_t &buffer_n) +{ + return 0; +} + diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index 94780799..5b9939c0 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -334,8 +334,10 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__new_style) ////chunked_reader r_flat = chunked_reader(fastafs_file.c_str()); Context c = Context(fastafs_file.c_str()); - c.fopen(0); // open file handle + c.fopen(0); // open file handle and load buffer + written = c.read(buffer, 1024); + printf("written = %i\n", written); } } From 4c77c63e6c903cd57fca297b2b85a9d78de27f7a Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Wed, 23 Feb 2022 20:40:34 +0100 Subject: [PATCH 29/65] closer, but needs testing --- src/chunked_reader.cpp | 27 ++++---- test.txt | 72 +++++++++++++++++++++ test/chunked_reader/test_chunked_reader.cpp | 20 +++++- 3 files changed, 104 insertions(+), 15 deletions(-) create mode 100644 test.txt diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 1697f1cb..f63312a7 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -393,25 +393,28 @@ size_t ContextUncompressed::read(char *arg_buffer_to, size_t arg_buffer_to_size, #endif //DEBUG size_t written = 0; - size_t n = std::min(buffer_n - buffer_i, arg_buffer_to_size); + const size_t n1 = std::min(buffer_n - buffer_i, arg_buffer_to_size); + const size_t n2 = std::min(buffer_i, arg_buffer_to_size - n1); - printf("buffer_n = %i, buffer_i = %i, arg_buffer_to_size = %i, n = %i\n",buffer_n, buffer_i, arg_buffer_to_size, n); + printf("buffer_n = %i, buffer_i = %i, arg_buffer_to_size = %i, n = %i, READ_BUFFER_SIZE=%i\n",buffer_n, buffer_i, arg_buffer_to_size, n1, READ_BUFFER_SIZE); // copy current internal buffer completely - while(written < n) + while(written < n1) { arg_buffer_to[written++] = this->context->get_buffer()[buffer_i++]; } - if(written < arg_buffer_to_size) { - // - - // same loop again - /* - while(this->buffer_i < this->buffer_n and written < buffer_size) { - arg_buffer[written++] = this->buffer[this->buffer_i++]; - } */ - printf("recursively call another read\n"); + if(n2 > 0) + { + printf("this->buffer_i = %i\n",buffer_i); + this->context->cache_buffer(); + printf("this->buffer_i = %i\n",buffer_i); + + while(written < (n1 + n2)) + { + arg_buffer_to[written++] = this->context->get_buffer()[buffer_i++]; + } + printf("recursively call another read :: %i\n", n2); } return written; diff --git a/test.txt b/test.txt new file mode 100644 index 00000000..faab0535 --- /dev/null +++ b/test.txt @@ -0,0 +1,72 @@ +111111111111111111111111111111111111111111111111111111111111111 +111111111111111111111111111111111111111111111111111111111111111 +111111111111111111111111111111111111111111111111111111111111111 +111111111111111111111111111111111111111111111111111111111111111 +111111111111111111111111111111111111111111111111111111111111111 +111111111111111111111111111111111111111111111111111111111111111 +111111111111111111111111111111111111111111111111111111111111111 +111111111111111111111111111111111111111111111111111111111111111 +111111111111111111111111111111111111111111111111111111111111111 +111111111111111111111111111111111111111111111111111111111111111 +111111111111111111111111111111111111111111111111111111111111111 +111111111111111111111111111111111111111111111111111111111111111 +111111111111111111111111111111111111111111111111111111111111111 +111111111111111111111111111111111111111111111111111111111111111 +111111111111111111111111111111111111111111111111111111111111111 +11111111111111111111111111111111111111111111111 +222222222222222222222222222222222222222222222222222222222222222 +222222222222222222222222222222222222222222222222222222222222222 +222222222222222222222222222222222222222222222222222222222222222 +222222222222222222222222222222222222222222222222222222222222222 +222222222222222222222222222222222222222222222222222222222222222 +222222222222222222222222222222222222222222222222222222222222222 +222222222222222222222222222222222222222222222222222222222222222 +222222222222222222222222222222222222222222222222222222222222222 +222222222222222222222222222222222222222222222222222222222222222 +222222222222222222222222222222222222222222222222222222222222222 +222222222222222222222222222222222222222222222222222222222222222 +222222222222222222222222222222222222222222222222222222222222222 +222222222222222222222222222222222222222222222222222222222222222 +222222222222222222222222222222222222222222222222222222222222222 +222222222222222222222222222222222222222222222222222222222222222 +22222222222222222222222222222222222222222222222 +333333333333333333333333333333333333333333333333333333333333333 +333333333333333333333333333333333333333333333333333333333333333 +333333333333333333333333333333333333333333333333333333333333333 +333333333333333333333333333333333333333333333333333333333333333 +333333333333333333333333333333333333333333333333333333333333333 +333333333333333333333333333333333333333333333333333333333333333 +333333333333333333333333333333333333333333333333333333333333333 +333333333333333333333333333333333333333333333333333333333333333 +333333333333333333333333333333333333333333333333333333333333333 +333333333333333333333333333333333333333333333333333333333333333 +333333333333333333333333333333333333333333333333333333333333333 +333333333333333333333333333333333333333333333333333333333333333 +333333333333333333333333333333333333333333333333333333333333333 +333333333333333333333333333333333333333333333333333333333333333 +333333333333333333333333333333333333333333333333333333333333333 +33333333333333333333333333333333333333333333333 +444444444444444444444444444444444444444444444444444444444444444 +444444444444444444444444444444444444444444444444444444444444444 +444444444444444444444444444444444444444444444444444444444444444 +444444444444444444444444444444444444444444444444444444444444444 +444444444444444444444444444444444444444444444444444444444444444 +444444444444444444444444444444444444444444444444444444444444444 +444444444444444444444444444444444444444444444444444444444444444 +444444444444444444444444444444444444444444444444444444444444444 +444444444444444444444444444444444444444444444444444444444444444 +444444444444444444444444444444444444444444444444444444444444444 +444444444444444444444444444444444444444444444444444444444444444 +444444444444444444444444444444444444444444444444444444444444444 +444444444444444444444444444444444444444444444444444444444444444 +444444444444444444444444444444444444444444444444444444444444444 +444444444444444444444444444444444444444444444444444444444444444 +44444444444444444444444444444444444444444444444 +||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +||||||||||||||||||||||||||||||||||||||||||||||||||||?:+ diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index 5b9939c0..7d22089f 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -333,11 +333,25 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__new_style) { ////chunked_reader r_flat = chunked_reader(fastafs_file.c_str()); - Context c = Context(fastafs_file.c_str()); + std::string tf = "test.txt"; + + //Context c = Context(fastafs_file.c_str()); + Context c = Context(tf.c_str()); c.fopen(0); // open file handle and load buffer - written = c.read(buffer, 1024); - printf("written = %i\n", written); + written = c.read(buffer, 10); + buffer[written] = '\0'; + printf("[%s]\n%i\n",buffer,written); + + written = c.read(buffer, 1024*4); + buffer[written] = '\0'; + printf("[%s]\n%i\n",buffer,written); + + written = c.read(buffer, 1024*4); + buffer[written] = '\0'; + printf("[%s]\n%i\n",buffer,written); + + //printf("written = %i\n", written); } } From 0b174639361892cf3f15cb77fd343a79cc68cd89 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Wed, 23 Feb 2022 21:28:25 +0100 Subject: [PATCH 30/65] working prototype --- src/chunked_reader.cpp | 25 ++----- test.txt | 73 +-------------------- test/chunked_reader/test_chunked_reader.cpp | 15 ++--- 3 files changed, 15 insertions(+), 98 deletions(-) diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index f63312a7..357d51ae 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -281,17 +281,12 @@ char * Context::get_buffer() size_t Context::cache_buffer() { - - printf("[x] this->buffer_i = %i, this->buffer_n = %i, this->file_i = %i\n",this->buffer_i, this->buffer_n, this->file_i); - size_t s = this->state->cache_buffer(); - printf("\n",s); + printf(" - gelezen bytes?\n",s); this->buffer_n = s; this->buffer_i = 0; this->file_i += s; - - printf("[y] this->buffer_i = %i, this->buffer_n = %i, this->file_i = %i\n",this->buffer_i, this->buffer_n, this->file_i); } size_t Context::read(char *arg_buffer, size_t arg_buffer_size) @@ -370,14 +365,10 @@ size_t ContextUncompressed::cache_buffer() if(!this->fh) { - printf("[%s]\n", this->context->get_buffer()); throw std::runtime_error("[ContextUncompressed::cache_buffer] Coult not open file. \n"); } - - printf("ContextUncompressed::cache_buffer\n"); size_t s = (size_t) this->fh->gcount(); - printf("[s=%i]\n",s); return s; } @@ -394,9 +385,8 @@ size_t ContextUncompressed::read(char *arg_buffer_to, size_t arg_buffer_to_size, size_t written = 0; const size_t n1 = std::min(buffer_n - buffer_i, arg_buffer_to_size); - const size_t n2 = std::min(buffer_i, arg_buffer_to_size - n1); - printf("buffer_n = %i, buffer_i = %i, arg_buffer_to_size = %i, n = %i, READ_BUFFER_SIZE=%i\n",buffer_n, buffer_i, arg_buffer_to_size, n1, READ_BUFFER_SIZE); + //printf("buffer_n = %i, buffer_i = %i, arg_buffer_to_size = %i, n1 = %i, n2 = %i READ_BUFFER_SIZE=%i\n",buffer_n, buffer_i, arg_buffer_to_size, n1, n2, READ_BUFFER_SIZE); // copy current internal buffer completely while(written < n1) @@ -404,17 +394,16 @@ size_t ContextUncompressed::read(char *arg_buffer_to, size_t arg_buffer_to_size, arg_buffer_to[written++] = this->context->get_buffer()[buffer_i++]; } - if(n2 > 0) + if(written < arg_buffer_to_size) { - printf("this->buffer_i = %i\n",buffer_i); this->context->cache_buffer(); - printf("this->buffer_i = %i\n",buffer_i); - - while(written < (n1 + n2)) + + while(buffer_i < buffer_n and written < arg_buffer_to_size) { arg_buffer_to[written++] = this->context->get_buffer()[buffer_i++]; } - printf("recursively call another read :: %i\n", n2); + + //printf("recursively call another read :: %i\n", n2); } return written; diff --git a/test.txt b/test.txt index faab0535..49ff60a6 100644 --- a/test.txt +++ b/test.txt @@ -1,72 +1 @@ -111111111111111111111111111111111111111111111111111111111111111 -111111111111111111111111111111111111111111111111111111111111111 -111111111111111111111111111111111111111111111111111111111111111 -111111111111111111111111111111111111111111111111111111111111111 -111111111111111111111111111111111111111111111111111111111111111 -111111111111111111111111111111111111111111111111111111111111111 -111111111111111111111111111111111111111111111111111111111111111 -111111111111111111111111111111111111111111111111111111111111111 -111111111111111111111111111111111111111111111111111111111111111 -111111111111111111111111111111111111111111111111111111111111111 -111111111111111111111111111111111111111111111111111111111111111 -111111111111111111111111111111111111111111111111111111111111111 -111111111111111111111111111111111111111111111111111111111111111 -111111111111111111111111111111111111111111111111111111111111111 -111111111111111111111111111111111111111111111111111111111111111 -11111111111111111111111111111111111111111111111 -222222222222222222222222222222222222222222222222222222222222222 -222222222222222222222222222222222222222222222222222222222222222 -222222222222222222222222222222222222222222222222222222222222222 -222222222222222222222222222222222222222222222222222222222222222 -222222222222222222222222222222222222222222222222222222222222222 -222222222222222222222222222222222222222222222222222222222222222 -222222222222222222222222222222222222222222222222222222222222222 -222222222222222222222222222222222222222222222222222222222222222 -222222222222222222222222222222222222222222222222222222222222222 -222222222222222222222222222222222222222222222222222222222222222 -222222222222222222222222222222222222222222222222222222222222222 -222222222222222222222222222222222222222222222222222222222222222 -222222222222222222222222222222222222222222222222222222222222222 -222222222222222222222222222222222222222222222222222222222222222 -222222222222222222222222222222222222222222222222222222222222222 -22222222222222222222222222222222222222222222222 -333333333333333333333333333333333333333333333333333333333333333 -333333333333333333333333333333333333333333333333333333333333333 -333333333333333333333333333333333333333333333333333333333333333 -333333333333333333333333333333333333333333333333333333333333333 -333333333333333333333333333333333333333333333333333333333333333 -333333333333333333333333333333333333333333333333333333333333333 -333333333333333333333333333333333333333333333333333333333333333 -333333333333333333333333333333333333333333333333333333333333333 -333333333333333333333333333333333333333333333333333333333333333 -333333333333333333333333333333333333333333333333333333333333333 -333333333333333333333333333333333333333333333333333333333333333 -333333333333333333333333333333333333333333333333333333333333333 -333333333333333333333333333333333333333333333333333333333333333 -333333333333333333333333333333333333333333333333333333333333333 -333333333333333333333333333333333333333333333333333333333333333 -33333333333333333333333333333333333333333333333 -444444444444444444444444444444444444444444444444444444444444444 -444444444444444444444444444444444444444444444444444444444444444 -444444444444444444444444444444444444444444444444444444444444444 -444444444444444444444444444444444444444444444444444444444444444 -444444444444444444444444444444444444444444444444444444444444444 -444444444444444444444444444444444444444444444444444444444444444 -444444444444444444444444444444444444444444444444444444444444444 -444444444444444444444444444444444444444444444444444444444444444 -444444444444444444444444444444444444444444444444444444444444444 -444444444444444444444444444444444444444444444444444444444444444 -444444444444444444444444444444444444444444444444444444444444444 -444444444444444444444444444444444444444444444444444444444444444 -444444444444444444444444444444444444444444444444444444444444444 -444444444444444444444444444444444444444444444444444444444444444 -444444444444444444444444444444444444444444444444444444444444444 -44444444444444444444444444444444444444444444444 -||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| -||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| -||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| -||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| -||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| -||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| -||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| -||||||||||||||||||||||||||||||||||||||||||||||||||||?:+ +11111111112222222222333333333344444444445555555555666666666677777777778888888888999999999900000000001111111111222222222233333333334444444444555555xyz diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index 7d22089f..7ce2f312 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -341,17 +341,16 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__new_style) written = c.read(buffer, 10); buffer[written] = '\0'; - printf("[%s]\n%i\n",buffer,written); + printf("\n[%s]\n%i\n",buffer,written); - written = c.read(buffer, 1024*4); + written = c.read(buffer, 100); buffer[written] = '\0'; - printf("[%s]\n%i\n",buffer,written); - - written = c.read(buffer, 1024*4); + printf("\n[%s]\n%i\n",buffer,written); + + written = c.read(buffer, 100); buffer[written] = '\0'; - printf("[%s]\n%i\n",buffer,written); - - //printf("written = %i\n", written); + printf("\n[%s]\n%i\n",buffer,written); + } } From a07d4765c2624434260a2337330326a95db369fe Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Thu, 24 Feb 2022 09:09:33 +0100 Subject: [PATCH 31/65] implements seek and tell for flat file --- include/chunked_reader.hpp | 10 ++++++++- src/chunked_reader.cpp | 46 +++++++++++++++++++++++++++++++------- 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index ecd73cce..0890c611 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -92,6 +92,7 @@ class State virtual void fopen(off_t) = 0; virtual size_t cache_buffer() = 0; // formerly update_..._buffer virtual size_t read(char *, size_t, size_t &, size_t &) = 0; + virtual void seek(off_t) = 0; }; // comrpession type @@ -106,6 +107,7 @@ class ContextUncompressed : public State void fopen(off_t) override; size_t cache_buffer() override; size_t read(char *, size_t, size_t &, size_t &) override; + void seek(off_t); ~ContextUncompressed() override; }; @@ -119,6 +121,9 @@ class ContextZstdSeekable : public State void fopen(off_t) override; size_t cache_buffer() override; size_t read(char *, size_t, size_t &, size_t &) override; + void seek(off_t); + + ~ContextZstdSeekable() override; }; @@ -141,12 +146,15 @@ class Context // master chunked_reader Context(const char *) ; State* find_state(); - void fopen(off_t); const std::string& get_filename(); char* get_buffer(); + + void fopen(off_t); size_t cache_buffer(); size_t read(char *, size_t); + void seek(off_t); + size_t tell(); }; diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 357d51ae..15f055b9 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -317,14 +317,21 @@ void Context::TransitionTo(State *arg_state) { void Context::fopen(off_t file_offset) { - printf("[1] this->buffer_i = %i, this->buffer_n = %i, this->file_i = %i\n",this->buffer_i, this->buffer_n, this->file_i); - this->state->fopen(file_offset); - printf("[2] this->buffer_i = %i, this->buffer_n = %i, this->file_i = %i\n",this->buffer_i, this->buffer_n, this->file_i); - this->cache_buffer(); - printf("[3] this->buffer_i = %i, this->buffer_n = %i, this->file_i = %i\n",this->buffer_i, this->buffer_n, this->file_i); + this->state->fopen(file_offset); // open file handle + this->cache_buffer(); // read into buffer } +void Context::seek(off_t arg_offset) +{ + this->state->seek(arg_offset); +} + +size_t Context::tell() +{ + return this->file_i - this->buffer_n + this->buffer_i; +} + @@ -384,7 +391,7 @@ size_t ContextUncompressed::read(char *arg_buffer_to, size_t arg_buffer_to_size, #endif //DEBUG size_t written = 0; - const size_t n1 = std::min(buffer_n - buffer_i, arg_buffer_to_size); + const size_t n1 = std::min(buffer_n - buffer_i, arg_buffer_to_size);// number of characters to copy //printf("buffer_n = %i, buffer_i = %i, arg_buffer_to_size = %i, n1 = %i, n2 = %i READ_BUFFER_SIZE=%i\n",buffer_n, buffer_i, arg_buffer_to_size, n1, n2, READ_BUFFER_SIZE); @@ -409,6 +416,17 @@ size_t ContextUncompressed::read(char *arg_buffer_to, size_t arg_buffer_to_size, return written; } +void ContextUncompressed::seek(off_t arg_offset) +{ + if(!this->fh->is_open()) + { + throw std::runtime_error("[ContextUncompressed::seek] unexpected closed filehandle found.\n"); + } + + this->fh->seekg(arg_offset, std::ios::beg); +} + + ContextUncompressed::~ContextUncompressed() { if(this->fh != nullptr) @@ -430,8 +448,9 @@ ContextUncompressed::~ContextUncompressed() -size_t ContextZstdSeekable::cache_buffer() { - printf("hello ZstdSeek\n"); +size_t ContextZstdSeekable::cache_buffer() +{ + throw std::runtime_error("[ContextZstdSeekable::cache_buffer] not implemented.\n"); return 0; } @@ -444,6 +463,17 @@ void ContextZstdSeekable::fopen(off_t start_pos) size_t ContextZstdSeekable::read(char *arg_buffer_to, size_t arg_buffer_to_size, size_t &buffer_i, size_t &buffer_n) { + throw std::runtime_error("[ContextZstdSeekable::read] not implemented.\n"); + return 0; } +void ContextZstdSeekable::seek(off_t arg_offset) +{ + throw std::runtime_error("[ContextZstdSeekable::seek] not implemented.\n"); +} + +ContextZstdSeekable::~ContextZstdSeekable() +{ + throw std::runtime_error("[ContextUncompressed::~ContextUncompressed] not implemented.\n"); +} From 932719c373490803d81c5e1230b65ff0c442ef05 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Wed, 2 Mar 2022 10:50:29 +0100 Subject: [PATCH 32/65] update --- src/chunked_reader.cpp | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 15f055b9..52a5e528 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -359,9 +359,12 @@ void ContextUncompressed::fopen(off_t start_pos = 0) this->fh->open(this->context->get_filename().c_str(), std::ios::in | std::ios::binary | std::ios::ate); - if(this->fh->is_open()) { - this->fh->seekg(start_pos, std::ios::beg); - } else { + if(this->fh->is_open()) + { + this->seek(start_pos); + } + else + { throw std::runtime_error("[chunked_reader::init] Cannot open file for reading.\n"); } } @@ -457,6 +460,25 @@ size_t ContextZstdSeekable::cache_buffer() void ContextZstdSeekable::fopen(off_t start_pos) { + + if(this->fh != nullptr) + { + throw std::runtime_error("[ContextZstdSeekable::fopen] opening a non closed reader.\n"); + } + + //this->fh = new std::ifstream; + //this->fh->open(this->context->get_filename().c_str(), std::ios::in | std::ios::binary | std::ios::ate); + + + //if(this->fh->is_open()) + //{ + // this->fh->seekg(start_pos, std::ios::beg); + //} + //else + //{ + // throw std::runtime_error("[chunked_reader::init] Cannot open file for reading.\n"); + //} + throw std::runtime_error("[ContextZstdSeekable::fopen] not implemented.\n"); } From a3dcaf8acfe0b858716330e06496eee5927b37f4 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Wed, 2 Mar 2022 13:59:42 +0100 Subject: [PATCH 33/65] tmp sav --- include/chunked_reader.hpp | 4 ++++ src/chunked_reader.cpp | 26 +++++++++++++------------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index 0890c611..24021ad6 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -116,6 +116,10 @@ class ContextZstdSeekable : public State { private: ZSTD_seekable_decompress_init_data* fh = nullptr; + + size_t const buffOutSize = ZSTD_DStreamOutSize(); + char* const buffOut = (char*) malloc_orDie(buffOutSize); + ZSTD_seekable* const seekable = ZSTD_seekable_create(); //@todo -> in constructor, check if not NULL public: void fopen(off_t) override; diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 52a5e528..8d43ff22 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -359,7 +359,7 @@ void ContextUncompressed::fopen(off_t start_pos = 0) this->fh->open(this->context->get_filename().c_str(), std::ios::in | std::ios::binary | std::ios::ate); - if(this->fh->is_open()) + if(this->fh->is_open()) // @todo move to top-level fopen() { this->seek(start_pos); } @@ -465,21 +465,21 @@ void ContextZstdSeekable::fopen(off_t start_pos) { throw std::runtime_error("[ContextZstdSeekable::fopen] opening a non closed reader.\n"); } + + + this->fh = ZSTD_seekable_decompressFile_init(this->context->get_filename().c_str()); - //this->fh = new std::ifstream; - //this->fh->open(this->context->get_filename().c_str(), std::ios::in | std::ios::binary | std::ios::ate); + if(fh->fin == NULL | feof(fh->fin)) + { + throw std::runtime_error("[ContextZstdSeekable::fopen] not implemented.\n"); + } + else + { + fseek_orDie(fh->fin, 0, SEEK_SET);// set initial file handle to 0? + // this->fh->seekg(start_pos, std::ios::beg); - //if(this->fh->is_open()) - //{ - // this->fh->seekg(start_pos, std::ios::beg); - //} - //else - //{ - // throw std::runtime_error("[chunked_reader::init] Cannot open file for reading.\n"); - //} - - throw std::runtime_error("[ContextZstdSeekable::fopen] not implemented.\n"); + } } size_t ContextZstdSeekable::read(char *arg_buffer_to, size_t arg_buffer_to_size, From df1449142465596d5c0568920577684dce963d51 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Wed, 2 Mar 2022 20:07:34 +0100 Subject: [PATCH 34/65] more init --- src/chunked_reader.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 8d43ff22..28c7a4b0 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -282,7 +282,6 @@ char * Context::get_buffer() size_t Context::cache_buffer() { size_t s = this->state->cache_buffer(); - printf(" - gelezen bytes?\n",s); this->buffer_n = s; this->buffer_i = 0; @@ -304,9 +303,7 @@ size_t Context::read(char *arg_buffer, size_t arg_buffer_size) void Context::TransitionTo(State *arg_state) { - std::cout << "Context: Transition to " << typeid(*arg_state).name() << ".\n"; - - if (this->state != nullptr) + if(this->state != nullptr) { delete this->state; // delete and destruct previous state, incl file points, should also run fh.close(); etc. } @@ -470,7 +467,7 @@ void ContextZstdSeekable::fopen(off_t start_pos) this->fh = ZSTD_seekable_decompressFile_init(this->context->get_filename().c_str()); - if(fh->fin == NULL | feof(fh->fin)) + if((fh->fin == NULL) | feof(fh->fin)) { throw std::runtime_error("[ContextZstdSeekable::fopen] not implemented.\n"); } @@ -479,6 +476,11 @@ void ContextZstdSeekable::fopen(off_t start_pos) fseek_orDie(fh->fin, 0, SEEK_SET);// set initial file handle to 0? // this->fh->seekg(start_pos, std::ios::beg); + size_t const initResult = ZSTD_seekable_initFile(this->seekable, fh->fin); + if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); } + + //@todo class member? + size_t maxFileSize = ZSTD_seekable_getFileDecompressedSize(this->seekable); } } From eb2ff8ecd2ef98ab204eaf7449d7a59c7dcc956b Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Wed, 2 Mar 2022 20:14:49 +0100 Subject: [PATCH 35/65] test --- include/chunked_reader.hpp | 2 ++ src/chunked_reader.cpp | 20 +++++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index 24021ad6..43b706a1 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -121,6 +121,8 @@ class ContextZstdSeekable : public State char* const buffOut = (char*) malloc_orDie(buffOutSize); ZSTD_seekable* const seekable = ZSTD_seekable_create(); //@todo -> in constructor, check if not NULL + size_t maxFileSize; + public: void fopen(off_t) override; size_t cache_buffer() override; diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 28c7a4b0..227921e4 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -480,13 +480,31 @@ void ContextZstdSeekable::fopen(off_t start_pos) if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); } //@todo class member? - size_t maxFileSize = ZSTD_seekable_getFileDecompressedSize(this->seekable); + this->maxFileSize = ZSTD_seekable_getFileDecompressedSize(this->seekable); } } size_t ContextZstdSeekable::read(char *arg_buffer_to, size_t arg_buffer_to_size, size_t &buffer_i, size_t &buffer_n) { + +/* + while (startOffset < endOffset) { + size_t const result = ZSTD_seekable_decompress(seekable, buffOut, MIN(endOffset - startOffset, buffOutSize), startOffset); + if (!result) { + break; + } + + if (ZSTD_isError(result)) { + fprintf(stderr, "ZSTD_seekable_decompress() error : %s \n", + ZSTD_getErrorName(result)); + exit(12); + } + fwrite_orDie(buffOut, result, fout); + startOffset += result; + } + */ + throw std::runtime_error("[ContextZstdSeekable::read] not implemented.\n"); return 0; From 82061598b66e3acee054f76fda6ac63833c32ba6 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sat, 5 Mar 2022 14:11:21 +0100 Subject: [PATCH 36/65] first level of testing --- CMakeLists.txt | 2 +- test/chunked_reader/test_chunked_reader.cpp | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5b1587cf..45553c26 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,7 +112,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") # -DXXH_NAMESPACE=ZST_ if(DEBUG) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -pg -ggdb -Wconversion -D_FILE_OFFSET_BITS=64")# -Werror makes compilation crash when warnings are given (also part of Travis) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -pg -ggdb -Wconversion -D_FILE_OFFSET_BITS=64 -g")# -Werror makes compilation crash when warnings are given (also part of Travis) else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -D_FILE_OFFSET_BITS=64") endif() diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index 7ce2f312..e4a3e482 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -56,12 +56,21 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) { chunked_reader r_flat = chunked_reader(fastafs_file.c_str()); + Context c(fastafs_file.c_str()); + c.fopen(0); + written = r_flat.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 403); std_buffer = std::string(buffer, written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + written = c.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 403); + std_buffer = std::string(buffer, written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + written = r_flat.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); From c9be08b682bd6c31904e6d0ec8cd2efc672f4c66 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sat, 5 Mar 2022 14:52:26 +0100 Subject: [PATCH 37/65] more sophisticated testing --- include/chunked_reader.hpp | 6 ++++-- src/chunked_reader.cpp | 12 ++++++++++++ test/chunked_reader/test_chunked_reader.cpp | 18 +++++++++++++----- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index 43b706a1..74c930da 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -93,8 +93,7 @@ class State virtual size_t cache_buffer() = 0; // formerly update_..._buffer virtual size_t read(char *, size_t, size_t &, size_t &) = 0; virtual void seek(off_t) = 0; - -}; // comrpession type +}; // compression type @@ -109,6 +108,7 @@ class ContextUncompressed : public State size_t read(char *, size_t, size_t &, size_t &) override; void seek(off_t); + ContextUncompressed(); ~ContextUncompressed() override; }; @@ -152,6 +152,8 @@ class Context // master chunked_reader Context(const char *) ; State* find_state(); + const std::type_info& typeid_state(); + const std::string& get_filename(); char* get_buffer(); diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 227921e4..67fa2e69 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -303,6 +303,7 @@ size_t Context::read(char *arg_buffer, size_t arg_buffer_size) void Context::TransitionTo(State *arg_state) { + if(this->state != nullptr) { delete this->state; // delete and destruct previous state, incl file points, should also run fh.close(); etc. @@ -329,6 +330,11 @@ size_t Context::tell() return this->file_i - this->buffer_n + this->buffer_i; } +const std::type_info& Context::typeid_state() +{ + return typeid(*this->state); // somehow pointer is needed to return ContextSubvariant rather than State +} + @@ -427,6 +433,12 @@ void ContextUncompressed::seek(off_t arg_offset) } +ContextUncompressed::ContextUncompressed() +{ + printf("[INVOKING ContextUncompressed::ContextUncompressed]\n"); + std::cout << "[[ " << typeid(this).name() << " ]]\n"; +} + ContextUncompressed::~ContextUncompressed() { if(this->fh != nullptr) diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index e4a3e482..729a83b2 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -58,6 +58,8 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) chunked_reader r_flat = chunked_reader(fastafs_file.c_str()); Context c(fastafs_file.c_str()); c.fopen(0); + BOOST_CHECK(c.typeid_state() == typeid(ContextUncompressed)); + BOOST_CHECK(c.typeid_state() != typeid(ContextZstdSeekable)); written = r_flat.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 403); @@ -65,11 +67,17 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - written = c.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 403); - std_buffer = std::string(buffer, written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + + // Context equivalent + { + written = c.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 403); + std_buffer = std::string(buffer, written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + BOOST_CHECK(c.typeid_state() == typeid(ContextUncompressed)); + BOOST_CHECK(c.typeid_state() != typeid(ContextZstdSeekable)); + } written = r_flat.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 0); From 566eeb73d1730f2ef76b0d9cfcc043f94f4b160a Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sun, 6 Mar 2022 19:31:59 +0100 Subject: [PATCH 38/65] more testing --- src/chunked_reader.cpp | 36 +++++++++++++++++++-- test/chunked_reader/test_chunked_reader.cpp | 35 +++++++++++++++++++- 2 files changed, 67 insertions(+), 4 deletions(-) diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 67fa2e69..8e36a640 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -325,6 +325,8 @@ void Context::seek(off_t arg_offset) this->state->seek(arg_offset); } + +// positio in the (decompressed) file size_t Context::tell() { return this->file_i - this->buffer_n + this->buffer_i; @@ -499,6 +501,36 @@ void ContextZstdSeekable::fopen(off_t start_pos) size_t ContextZstdSeekable::read(char *arg_buffer_to, size_t arg_buffer_to_size, size_t &buffer_i, size_t &buffer_n) { + size_t written = 0; + + + + + + + size_t endOffset = std::min( (size_t) buffer_n, (size_t) READ_BUFFER_SIZE); + size_t startOffset = buffer_i; + + size_t buffer_out_i = 0; + while (startOffset < endOffset) { + size_t const result = ZSTD_seekable_decompress(seekable, this->context->get_buffer(), std::min((size_t) endOffset - startOffset, buffOutSize), (size_t) startOffset); + + if (ZSTD_isError(result)) { + fprintf(stderr, "ZSTD_seekable_decompress() error : %s \n", + ZSTD_getErrorName(result)); + exit(12); + } + + /*for(size_t i = 0; i < result; i++) { + this->buffer[buffer_out_i] = arg_buffer_to[i]; + buffer_out_i++; + }*/ + + startOffset += result; + written += result; + } + + /* while (startOffset < endOffset) { @@ -517,9 +549,7 @@ size_t ContextZstdSeekable::read(char *arg_buffer_to, size_t arg_buffer_to_size, } */ - throw std::runtime_error("[ContextZstdSeekable::read] not implemented.\n"); - - return 0; + return written; } void ContextZstdSeekable::seek(off_t arg_offset) diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index 729a83b2..14d9e334 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -61,32 +61,65 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) BOOST_CHECK(c.typeid_state() == typeid(ContextUncompressed)); BOOST_CHECK(c.typeid_state() != typeid(ContextZstdSeekable)); + BOOST_CHECK_EQUAL(r_flat.tell(), 0); written = r_flat.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 403); std_buffer = std::string(buffer, written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + BOOST_CHECK_EQUAL(r_flat.tell(), 403); // Context equivalent { + BOOST_CHECK_EQUAL(c.tell(), 0); written = c.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 403); std_buffer = std::string(buffer, written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + BOOST_CHECK_EQUAL(c.tell(), 403); + BOOST_CHECK(c.typeid_state() == typeid(ContextUncompressed)); BOOST_CHECK(c.typeid_state() != typeid(ContextZstdSeekable)); + } + // test what happens when file is closed (twice) written = r_flat.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + BOOST_CHECK_EQUAL(c.tell(), 403); - // test what happens when file is closed written = r_flat.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + BOOST_CHECK_EQUAL(c.tell(), 403); + + + // Context equivalent + { + written = c.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + BOOST_CHECK_EQUAL(c.tell(), 403); + + written = c.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + BOOST_CHECK_EQUAL(c.tell(), 403); + } + + + + + + // Context equivalent + { + written = c.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } // test seek stuff From 8f6dea8f5e6c8155f0b23ab72701d59e268d7093 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sun, 6 Mar 2022 20:24:21 +0100 Subject: [PATCH 39/65] small patches to slight mess --- include/chunked_reader.hpp | 1 - src/chunked_reader.cpp | 23 ++++++--- test.txt | 1 - test/chunked_reader/test_chunked_reader.cpp | 54 +++++++++++++++------ 4 files changed, 56 insertions(+), 23 deletions(-) delete mode 100644 test.txt diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index 74c930da..3717d800 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -108,7 +108,6 @@ class ContextUncompressed : public State size_t read(char *, size_t, size_t &, size_t &) override; void seek(off_t); - ContextUncompressed(); ~ContextUncompressed() override; }; diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 8e36a640..63fbcb80 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -249,6 +249,9 @@ void chunked_reader::seek(off_t offset) size_t chunked_reader::tell() { + //@todo decide what to return when out of bound + //e.g. when exceeding file size + return this->file_i - this->buffer_n + this->buffer_i; } @@ -322,7 +325,9 @@ void Context::fopen(off_t file_offset) void Context::seek(off_t arg_offset) { - this->state->seek(arg_offset); + this->file_i = arg_offset; // @todo obtain return value from this->state->seek() and limit this + this->state->seek(arg_offset);// set file pointer + this->cache_buffer();// update internal buffer } @@ -376,6 +381,14 @@ void ContextUncompressed::fopen(off_t start_pos = 0) size_t ContextUncompressed::cache_buffer() { + if(!this->fh->is_open()) + { + throw std::runtime_error("[ContextUncompressed::seek] this seek killed the filehandle.\n"); + } + printf("AA cache_buffer at: %i\n",this->fh->tellg()); + this->fh->seekg(0, std::ios::beg); + printf("BB cache_buffer at: %i\n",this->fh->tellg()); + this->fh->read(this->context->get_buffer(), READ_BUFFER_SIZE); if(!this->fh) @@ -384,6 +397,7 @@ size_t ContextUncompressed::cache_buffer() } size_t s = (size_t) this->fh->gcount(); + printf("read: %i\n",s); return s; } @@ -401,7 +415,7 @@ size_t ContextUncompressed::read(char *arg_buffer_to, size_t arg_buffer_to_size, size_t written = 0; const size_t n1 = std::min(buffer_n - buffer_i, arg_buffer_to_size);// number of characters to copy - //printf("buffer_n = %i, buffer_i = %i, arg_buffer_to_size = %i, n1 = %i, n2 = %i READ_BUFFER_SIZE=%i\n",buffer_n, buffer_i, arg_buffer_to_size, n1, n2, READ_BUFFER_SIZE); + printf("buffer_n = %i, buffer_i = %i, arg_buffer_to_size = %i, n1 = %i, READ_BUFFER_SIZE=%i\n",buffer_n, buffer_i, arg_buffer_to_size, n1, (int) READ_BUFFER_SIZE); // copy current internal buffer completely while(written < n1) @@ -435,11 +449,6 @@ void ContextUncompressed::seek(off_t arg_offset) } -ContextUncompressed::ContextUncompressed() -{ - printf("[INVOKING ContextUncompressed::ContextUncompressed]\n"); - std::cout << "[[ " << typeid(this).name() << " ]]\n"; -} ContextUncompressed::~ContextUncompressed() { diff --git a/test.txt b/test.txt deleted file mode 100644 index 49ff60a6..00000000 --- a/test.txt +++ /dev/null @@ -1 +0,0 @@ -11111111112222222222333333333344444444445555555555666666666677777777778888888888999999999900000000001111111111222222222233333333334444444444555555xyz diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index 14d9e334..3691fea8 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -111,21 +111,36 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) } - - - + // test seek stuff + BOOST_CHECK_EQUAL(r_flat.tell(), 403); + r_flat.seek(0); + BOOST_CHECK_EQUAL(r_flat.tell(), 0); + r_flat.seek(1); + BOOST_CHECK_EQUAL(r_flat.tell(), 1); + r_flat.seek(402); + BOOST_CHECK_EQUAL(r_flat.tell(), 402); + // when out of 'bound' return -1 + // type should be 'streampos' + //r_flat.seek(1337); + //BOOST_CHECK_EQUAL(r_flat.tell(), 403); + + // Context equivalent { - written = c.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 0); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - } + BOOST_CHECK_EQUAL(c.tell(), 403); + c.seek(0); + BOOST_CHECK_EQUAL(c.tell(), 0); + c.seek(1); + BOOST_CHECK_EQUAL(c.tell(), 1); + c.seek(402); + BOOST_CHECK_EQUAL(c.tell(), 402); + //r_flat.seek(1337); + //BOOST_CHECK_EQUAL(r_flat.tell(), 403 | 402); + } - // test seek stuff - r_flat.seek(0); // reset to first pos in file - BOOST_CHECK_EQUAL(r_flat.tell(), 0); + r_flat.seek(0); written = r_flat.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); BOOST_CHECK_EQUAL(r_flat.tell(), 4); @@ -134,6 +149,20 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + // Context equivalent + { + c.seek(0); + BOOST_CHECK_EQUAL(c.tell(), 0); + written = c.read(buffer, 4); + BOOST_CHECK_EQUAL(written, 4); + + BOOST_CHECK_EQUAL(c.tell(), 4); + std_buffer = std::string(buffer, written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + r_flat.seek(1); // reset to first pos in file BOOST_CHECK_EQUAL(r_flat.tell(), 1); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); @@ -383,10 +412,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__new_style) { ////chunked_reader r_flat = chunked_reader(fastafs_file.c_str()); - std::string tf = "test.txt"; - - //Context c = Context(fastafs_file.c_str()); - Context c = Context(tf.c_str()); + Context c = Context(fasta_file.c_str()); c.fopen(0); // open file handle and load buffer written = c.read(buffer, 10); From e93df2f222c4af96a732bae46b371b2a0e0fffce Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Thu, 17 Mar 2022 15:09:40 +0100 Subject: [PATCH 40/65] test --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 45553c26..1c516b00 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,6 +5,7 @@ cmake_minimum_required(VERSION 2.8) project(fastafs) +# helps debugging: # Do this once in a while - find different compiler warnings #set(CMAKE_CXX_COMPILER "clang++") From c48cd259573897b329d772ee24f72ed8e0d74298 Mon Sep 17 00:00:00 2001 From: youri Date: Sun, 27 Mar 2022 12:19:15 +0200 Subject: [PATCH 41/65] consider behaviour of reader after EOF --- CMakeLists.txt | 2 +- src/chunked_reader.cpp | 26 ++++++++++++++++++++------ 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 45553c26..69377470 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,7 +112,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") # -DXXH_NAMESPACE=ZST_ if(DEBUG) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -pg -ggdb -Wconversion -D_FILE_OFFSET_BITS=64 -g")# -Werror makes compilation crash when warnings are given (also part of Travis) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -pg -ggdb -Wconversion -D_FILE_OFFSET_BITS=64 -g -DBOOST_TEST_TOOLS_UNDER_DEBUGGER -DBOOST_TEST_TOOLS_DEBUGGABLE")# -Werror makes compilation crash when warnings are given (also part of Travis) else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -D_FILE_OFFSET_BITS=64") endif() diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 63fbcb80..b3843cba 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -381,19 +381,33 @@ void ContextUncompressed::fopen(off_t start_pos = 0) size_t ContextUncompressed::cache_buffer() { - if(!this->fh->is_open()) + if(!this->fh->good()) { - throw std::runtime_error("[ContextUncompressed::seek] this seek killed the filehandle.\n"); + std::cout << "pre read(): \n"; + std::cout << " good()=" << this->fh->good() << "\n"; + std::cout << " eof()=" << this->fh->eof() << "\n"; + std::cout << " fail()=" << this->fh->fail() << "\n"; + std::cout << " bad()=" << this->fh->bad() << "\n\n"; + //throw std::runtime_error("[ContextUncompressed::cache_buffer] fh is not 'good'. \n"); } + printf("AA cache_buffer at: %i\n",this->fh->tellg()); - this->fh->seekg(0, std::ios::beg); - printf("BB cache_buffer at: %i\n",this->fh->tellg()); + if(this->fh->tellg() == -1) + { + this->fh->seekg(0, std::ios::beg); + printf("BB cache_buffer at: %i [after flushing to 0?]\n",this->fh->tellg()); + } this->fh->read(this->context->get_buffer(), READ_BUFFER_SIZE); - if(!this->fh) + if(!this->fh->good()) { - throw std::runtime_error("[ContextUncompressed::cache_buffer] Coult not open file. \n"); + std::cout << "post read(): \n"; + std::cout << " good()=" << this->fh->good() << "\n"; + std::cout << " eof()=" << this->fh->eof() << "\n"; + std::cout << " fail()=" << this->fh->fail() << "\n"; + std::cout << " bad()=" << this->fh->bad() << "\n\n"; + //throw std::runtime_error("[ContextUncompressed::cache_buffer] fh is not 'good'. \n"); } size_t s = (size_t) this->fh->gcount(); From 6276fa3626c070804d4fa15303efa258672d5b80 Mon Sep 17 00:00:00 2001 From: youri Date: Sun, 27 Mar 2022 12:26:37 +0200 Subject: [PATCH 42/65] seems better --- src/chunked_reader.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index b3843cba..f8c83bdd 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -408,11 +408,17 @@ size_t ContextUncompressed::cache_buffer() std::cout << " fail()=" << this->fh->fail() << "\n"; std::cout << " bad()=" << this->fh->bad() << "\n\n"; //throw std::runtime_error("[ContextUncompressed::cache_buffer] fh is not 'good'. \n"); + } size_t s = (size_t) this->fh->gcount(); printf("read: %i\n",s); + if(this->fh->eof()) { + this->fh->clear(); + this->fh->seekg(0, std::ios::end); + } + return s; } From b523e705ebc42ce0c408a96ffbe35ae05e558a68 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sun, 3 Apr 2022 12:48:04 +0200 Subject: [PATCH 43/65] sav --- src/chunked_reader.cpp | 28 +---- test/chunked_reader/test_chunked_reader.cpp | 107 ++++++++++++-------- 2 files changed, 67 insertions(+), 68 deletions(-) diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index f8c83bdd..a0b59237 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -381,38 +381,16 @@ void ContextUncompressed::fopen(off_t start_pos = 0) size_t ContextUncompressed::cache_buffer() { - if(!this->fh->good()) - { - std::cout << "pre read(): \n"; - std::cout << " good()=" << this->fh->good() << "\n"; - std::cout << " eof()=" << this->fh->eof() << "\n"; - std::cout << " fail()=" << this->fh->fail() << "\n"; - std::cout << " bad()=" << this->fh->bad() << "\n\n"; - //throw std::runtime_error("[ContextUncompressed::cache_buffer] fh is not 'good'. \n"); - } - - printf("AA cache_buffer at: %i\n",this->fh->tellg()); +#if DEBUG if(this->fh->tellg() == -1) { - this->fh->seekg(0, std::ios::beg); - printf("BB cache_buffer at: %i [after flushing to 0?]\n",this->fh->tellg()); + throw std::runtime_error("ContextUncompressed::cache_buffer\n"); } +#endif //DEBUG this->fh->read(this->context->get_buffer(), READ_BUFFER_SIZE); - if(!this->fh->good()) - { - std::cout << "post read(): \n"; - std::cout << " good()=" << this->fh->good() << "\n"; - std::cout << " eof()=" << this->fh->eof() << "\n"; - std::cout << " fail()=" << this->fh->fail() << "\n"; - std::cout << " bad()=" << this->fh->bad() << "\n\n"; - //throw std::runtime_error("[ContextUncompressed::cache_buffer] fh is not 'good'. \n"); - - } - size_t s = (size_t) this->fh->gcount(); - printf("read: %i\n",s); if(this->fh->eof()) { this->fh->clear(); diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index 3691fea8..9db448d8 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -55,11 +55,22 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) std::string reference3 = "\x0a\x46\x53\x00"s; { + // old init chunked_reader r_flat = chunked_reader(fastafs_file.c_str()); - Context c(fastafs_file.c_str()); - c.fopen(0); - BOOST_CHECK(c.typeid_state() == typeid(ContextUncompressed)); - BOOST_CHECK(c.typeid_state() != typeid(ContextZstdSeekable)); + + // Context equivalent - uncompressed + Context c1(fastafs_file.c_str()); + c1.fopen(0); + BOOST_CHECK(c1.typeid_state() == typeid(ContextUncompressed)); + BOOST_CHECK(c1.typeid_state() != typeid(ContextZstdSeekable)); + + // Context equivalent - compressed + //Context c2(fastafs_file_zstd.c_str()); + //c2.fopen(0); + //BOOST_CHECK(c2.typeid_state() == typeid(ContextZstdSeekable)); + //BOOST_CHECK(c2.typeid_state() != typeid(ContextUncompressed)); + + BOOST_CHECK_EQUAL(r_flat.tell(), 0); written = r_flat.read(buffer, 1024); @@ -69,45 +80,43 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); BOOST_CHECK_EQUAL(r_flat.tell(), 403); - - // Context equivalent + // Context equivalent - uncompressed { - BOOST_CHECK_EQUAL(c.tell(), 0); - written = c.read(buffer, 1024); + BOOST_CHECK_EQUAL(c1.tell(), 0); + written = c1.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 403); std_buffer = std::string(buffer, written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - BOOST_CHECK_EQUAL(c.tell(), 403); + BOOST_CHECK_EQUAL(c1.tell(), 403); - BOOST_CHECK(c.typeid_state() == typeid(ContextUncompressed)); - BOOST_CHECK(c.typeid_state() != typeid(ContextZstdSeekable)); - + BOOST_CHECK(c1.typeid_state() == typeid(ContextUncompressed)); + BOOST_CHECK(c1.typeid_state() != typeid(ContextZstdSeekable)); } + // test what happens when file is closed (twice) written = r_flat.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - BOOST_CHECK_EQUAL(c.tell(), 403); + BOOST_CHECK_EQUAL(r_flat.tell(), 403); written = r_flat.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - BOOST_CHECK_EQUAL(c.tell(), 403); - + BOOST_CHECK_EQUAL(r_flat.tell(), 403); - // Context equivalent + // Context equivalent - uncompressed { - written = c.read(buffer, 1024); + written = c1.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - BOOST_CHECK_EQUAL(c.tell(), 403); + BOOST_CHECK_EQUAL(c1.tell(), 403); - written = c.read(buffer, 1024); + written = c1.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - BOOST_CHECK_EQUAL(c.tell(), 403); + BOOST_CHECK_EQUAL(c1.tell(), 403); } @@ -119,24 +128,16 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) BOOST_CHECK_EQUAL(r_flat.tell(), 1); r_flat.seek(402); BOOST_CHECK_EQUAL(r_flat.tell(), 402); - // when out of 'bound' return -1 - // type should be 'streampos' - //r_flat.seek(1337); - //BOOST_CHECK_EQUAL(r_flat.tell(), 403); - - - // Context equivalent + + // Context equivalent - uncompressed { - BOOST_CHECK_EQUAL(c.tell(), 403); - c.seek(0); - BOOST_CHECK_EQUAL(c.tell(), 0); - c.seek(1); - BOOST_CHECK_EQUAL(c.tell(), 1); - c.seek(402); - BOOST_CHECK_EQUAL(c.tell(), 402); - - //r_flat.seek(1337); - //BOOST_CHECK_EQUAL(r_flat.tell(), 403 | 402); + BOOST_CHECK_EQUAL(c1.tell(), 403); + c1.seek(0); + BOOST_CHECK_EQUAL(c1.tell(), 0); + c1.seek(1); + BOOST_CHECK_EQUAL(c1.tell(), 1); + c1.seek(402); + BOOST_CHECK_EQUAL(c1.tell(), 402); } @@ -149,14 +150,14 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - // Context equivalent + // Context equivalent - uncompressed { - c.seek(0); - BOOST_CHECK_EQUAL(c.tell(), 0); - written = c.read(buffer, 4); + c1.seek(0); + BOOST_CHECK_EQUAL(c1.tell(), 0); + written = c1.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); - BOOST_CHECK_EQUAL(c.tell(), 4); + BOOST_CHECK_EQUAL(c1.tell(), 4); std_buffer = std::string(buffer, written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); @@ -166,13 +167,33 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) r_flat.seek(1); // reset to first pos in file BOOST_CHECK_EQUAL(r_flat.tell(), 1); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - written = r_flat.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); BOOST_CHECK_EQUAL(r_flat.tell(), 5); std_buffer = std::string(buffer, written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + + // Context equivalent - uncompressed + { + c1.seek(1); + BOOST_CHECK_EQUAL(c1.tell(), 1); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + written = c1.read(buffer, 4); + BOOST_CHECK_EQUAL(written, 4); + BOOST_CHECK_EQUAL(c1.tell(), 5); + std_buffer = std::string(buffer, written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + + r_flat.seek(1024*1024); // trigger out of bound + + // Context equivalent - uncompressed + { + c1.seek(1024*1024); + } } { From bb01a799a6c8c6cb408888ff286b30d2789a2ffc Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sun, 3 Apr 2022 13:27:42 +0200 Subject: [PATCH 44/65] seekable stuff --- .../zstd-seekable-adapted/zstdseek_utils.cpp | 2 - src/chunked_reader.cpp | 47 +++++++++++++++---- test/chunked_reader/test_chunked_reader.cpp | 10 ++-- 3 files changed, 45 insertions(+), 14 deletions(-) diff --git a/dependencies/zstd-seekable-adapted/zstdseek_utils.cpp b/dependencies/zstd-seekable-adapted/zstdseek_utils.cpp index 750bd775..47e61565 100644 --- a/dependencies/zstd-seekable-adapted/zstdseek_utils.cpp +++ b/dependencies/zstd-seekable-adapted/zstdseek_utils.cpp @@ -175,7 +175,6 @@ size_t ZSTD_seekable_decompressFile_orDie(ZSTD_seekable_decompress_init_data* fh size_t written = 0; if(fh->fin == NULL) { - printf("fin == NULL: YES!!\n"); exit(124); } //else { @@ -183,7 +182,6 @@ size_t ZSTD_seekable_decompressFile_orDie(ZSTD_seekable_decompress_init_data* fh //} if (feof(fh->fin)) { - printf ("!!!! FEOF !!!!! \n"); exit(123); } //else { diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index a0b59237..954fa211 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -267,8 +267,6 @@ void State::set_context(Context *arg_context) Context::Context(const char * arg_filename) : filename(arg_filename), buffer("\0"), buffer_i(0), buffer_n(0), file_i(0), state(nullptr) { - printf("Constructor alive\n"); - this->TransitionTo(this->find_state()); } @@ -345,7 +343,7 @@ const std::type_info& Context::typeid_state() -State * Context::find_state() +State *Context::find_state() { if(is_zstd_file(this->filename.c_str())) { @@ -400,6 +398,8 @@ size_t ContextUncompressed::cache_buffer() return s; } + +// This does not read the actual flat file, this copies its internal buffer to arg_buffer_to size_t ContextUncompressed::read(char *arg_buffer_to, size_t arg_buffer_to_size, size_t &buffer_i, size_t &buffer_n) { @@ -413,8 +413,6 @@ size_t ContextUncompressed::read(char *arg_buffer_to, size_t arg_buffer_to_size, size_t written = 0; const size_t n1 = std::min(buffer_n - buffer_i, arg_buffer_to_size);// number of characters to copy - printf("buffer_n = %i, buffer_i = %i, arg_buffer_to_size = %i, n1 = %i, READ_BUFFER_SIZE=%i\n",buffer_n, buffer_i, arg_buffer_to_size, n1, (int) READ_BUFFER_SIZE); - // copy current internal buffer completely while(written < n1) { @@ -471,14 +469,47 @@ ContextUncompressed::~ContextUncompressed() size_t ContextZstdSeekable::cache_buffer() { - throw std::runtime_error("[ContextZstdSeekable::cache_buffer] not implemented.\n"); + //size_t written = ZSTD_seekable_decompressFile_orDie(this->fh_zstd, this->file_i, this->buffer, this->file_i + READ_BUFFER_SIZE); + //this->fh->read(this->context->get_buffer(), READ_BUFFER_SIZE); + + size_t written = ZSTD_seekable_decompressFile_orDie( + this->fh, + 0, //this->context->file_i, + this->context->get_buffer(), + 0 + READ_BUFFER_SIZE //this->context->file_i + READ_BUFFER_SIZE + ); + + printf("written = %i\n", written); + + /* + { + #if DEBUG + if(this->fh->tellg() == -1) + { + throw std::runtime_error("ContextUncompressed::cache_buffer\n"); + } + #endif //DEBUG + + this->fh->read(this->context->get_buffer(), READ_BUFFER_SIZE); + + size_t s = (size_t) this->fh->gcount(); + + if(this->fh->eof()) { + this->fh->clear(); + this->fh->seekg(0, std::ios::end); + } + + return s; + } + */ + + //throw std::runtime_error("[ContextZstdSeekable::cache_buffer] not implemented.\n"); - return 0; + return written; } void ContextZstdSeekable::fopen(off_t start_pos) { - if(this->fh != nullptr) { throw std::runtime_error("[ContextZstdSeekable::fopen] opening a non closed reader.\n"); diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index 9db448d8..f849b225 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -65,10 +65,12 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) BOOST_CHECK(c1.typeid_state() != typeid(ContextZstdSeekable)); // Context equivalent - compressed - //Context c2(fastafs_file_zstd.c_str()); - //c2.fopen(0); - //BOOST_CHECK(c2.typeid_state() == typeid(ContextZstdSeekable)); - //BOOST_CHECK(c2.typeid_state() != typeid(ContextUncompressed)); + printf("checkpoint 1\n"); + Context c2(fastafs_file_zstd.c_str()); + c2.fopen(0); + BOOST_CHECK(c2.typeid_state() == typeid(ContextZstdSeekable)); + BOOST_CHECK(c2.typeid_state() != typeid(ContextUncompressed)); + printf("checkpoint 2\n"); From ab33df1a146c8d177f574c189adbc601e651ce88 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Tue, 12 Apr 2022 16:12:17 +0200 Subject: [PATCH 45/65] closing in --- .../zstd-seekable-adapted/zstdseek_utils.cpp | 1 + include/chunked_reader.hpp | 5 +- src/chunked_reader.cpp | 143 +++++++----------- test/chunked_reader/test_chunked_reader.cpp | 25 ++- 4 files changed, 77 insertions(+), 97 deletions(-) diff --git a/dependencies/zstd-seekable-adapted/zstdseek_utils.cpp b/dependencies/zstd-seekable-adapted/zstdseek_utils.cpp index 47e61565..d529d609 100644 --- a/dependencies/zstd-seekable-adapted/zstdseek_utils.cpp +++ b/dependencies/zstd-seekable-adapted/zstdseek_utils.cpp @@ -230,6 +230,7 @@ size_t ZSTD_seekable_decompressFile_orDie(ZSTD_seekable_decompress_init_data* fh //fh->fin_locked = false; + return written; } diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index 3717d800..5ca7d13f 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -86,12 +86,11 @@ class State public: virtual ~State() {}; void set_context(Context *); - + size_t read(char *, size_t, size_t &, size_t &); // reads from buffer, context a-specific // virtual functions: virtual void fopen(off_t) = 0; virtual size_t cache_buffer() = 0; // formerly update_..._buffer - virtual size_t read(char *, size_t, size_t &, size_t &) = 0; virtual void seek(off_t) = 0; }; // compression type @@ -105,7 +104,6 @@ class ContextUncompressed : public State public: void fopen(off_t) override; size_t cache_buffer() override; - size_t read(char *, size_t, size_t &, size_t &) override; void seek(off_t); ~ContextUncompressed() override; @@ -125,7 +123,6 @@ class ContextZstdSeekable : public State public: void fopen(off_t) override; size_t cache_buffer() override; - size_t read(char *, size_t, size_t &, size_t &) override; void seek(off_t); ~ContextZstdSeekable() override; diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 954fa211..7337a6aa 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -262,6 +262,50 @@ void State::set_context(Context *arg_context) this->context = arg_context; } +// This does not read the actual flat file, this copies its internal buffer to arg_buffer_to +size_t State::read(char *arg_buffer_to, size_t arg_buffer_to_size, + size_t &buffer_i, size_t &buffer_n) +{ +#if DEBUG + if(arg_buffer_to_size > READ_BUFFER_SIZE) + { + throw std::runtime_error("[ContextUncompressed::read] Requested buffer size larger than internal context buffer.\n"); + } +#endif //DEBUG + + size_t written = 0; + const size_t n1 = std::min(buffer_n - buffer_i, arg_buffer_to_size);// number of characters to copy + printf("a. buffer_i = %i buffer_n = %i n1 = %i written = %i arg_buffer_to_size = %i\n", buffer_i, buffer_n , n1, written, arg_buffer_to_size); + + // copy current internal buffer completely + while(written < n1) + { + arg_buffer_to[written++] = this->context->get_buffer()[buffer_i++]; + } + + printf("b. buffer_i = %i buffer_n = %i n1 = %i written = %i arg_buffer_to_size = %i\n", buffer_i, buffer_n , n1, written, arg_buffer_to_size); + if(written < arg_buffer_to_size) + { + printf("true\n"); + this->context->cache_buffer();// needs to set n to 0 + + printf("c. buffer_i = %i buffer_n = %i n1 = %i written = %i arg_buffer_to_size = %i\n", buffer_i, buffer_n , n1, written, arg_buffer_to_size); + while(buffer_i < buffer_n and written < arg_buffer_to_size) + { + arg_buffer_to[written++] = this->context->get_buffer()[buffer_i++]; + } + printf("d. buffer_i = %i buffer_n = %i n1 = %i written = %i arg_buffer_to_size = %i\n", buffer_i, buffer_n , n1, written, arg_buffer_to_size); + + + //printf("recursively call another read :: %i\n", n2); + } + printf("e. buffer_i = %i buffer_n = %i n1 = %i written = %i arg_buffer_to_size = %i\n", buffer_i, buffer_n , n1, written, arg_buffer_to_size); + + + return written; +} + + @@ -399,40 +443,6 @@ size_t ContextUncompressed::cache_buffer() } -// This does not read the actual flat file, this copies its internal buffer to arg_buffer_to -size_t ContextUncompressed::read(char *arg_buffer_to, size_t arg_buffer_to_size, - size_t &buffer_i, size_t &buffer_n) -{ -#if DEBUG - if(arg_buffer_to_size > READ_BUFFER_SIZE) - { - throw std::runtime_error("[ContextUncompressed::read] Requested buffer size larger than internal context buffer.\n"); - } -#endif //DEBUG - - size_t written = 0; - const size_t n1 = std::min(buffer_n - buffer_i, arg_buffer_to_size);// number of characters to copy - - // copy current internal buffer completely - while(written < n1) - { - arg_buffer_to[written++] = this->context->get_buffer()[buffer_i++]; - } - - if(written < arg_buffer_to_size) - { - this->context->cache_buffer(); - - while(buffer_i < buffer_n and written < arg_buffer_to_size) - { - arg_buffer_to[written++] = this->context->get_buffer()[buffer_i++]; - } - - //printf("recursively call another read :: %i\n", n2); - } - - return written; -} void ContextUncompressed::seek(off_t arg_offset) { @@ -472,14 +482,19 @@ size_t ContextZstdSeekable::cache_buffer() //size_t written = ZSTD_seekable_decompressFile_orDie(this->fh_zstd, this->file_i, this->buffer, this->file_i + READ_BUFFER_SIZE); //this->fh->read(this->context->get_buffer(), READ_BUFFER_SIZE); + // figure out the location in the decompressed file + + printf("%i\n",this->context->tell()); + size_t written = ZSTD_seekable_decompressFile_orDie( this->fh, - 0, //this->context->file_i, + this->context->tell(), //this->context->file_i, this->context->get_buffer(), - 0 + READ_BUFFER_SIZE //this->context->file_i + READ_BUFFER_SIZE + this->context->tell() + READ_BUFFER_SIZE //this->context->file_i + READ_BUFFER_SIZE ); - printf("written = %i\n", written); + //printf("written = %i\n", written); + //printf("{{%s}}\n", this->context->get_buffer()); /* { @@ -536,60 +551,6 @@ void ContextZstdSeekable::fopen(off_t start_pos) } } -size_t ContextZstdSeekable::read(char *arg_buffer_to, size_t arg_buffer_to_size, - size_t &buffer_i, size_t &buffer_n) -{ - size_t written = 0; - - - - - - - size_t endOffset = std::min( (size_t) buffer_n, (size_t) READ_BUFFER_SIZE); - size_t startOffset = buffer_i; - - size_t buffer_out_i = 0; - while (startOffset < endOffset) { - size_t const result = ZSTD_seekable_decompress(seekable, this->context->get_buffer(), std::min((size_t) endOffset - startOffset, buffOutSize), (size_t) startOffset); - - if (ZSTD_isError(result)) { - fprintf(stderr, "ZSTD_seekable_decompress() error : %s \n", - ZSTD_getErrorName(result)); - exit(12); - } - - /*for(size_t i = 0; i < result; i++) { - this->buffer[buffer_out_i] = arg_buffer_to[i]; - buffer_out_i++; - }*/ - - startOffset += result; - written += result; - } - - - -/* - while (startOffset < endOffset) { - size_t const result = ZSTD_seekable_decompress(seekable, buffOut, MIN(endOffset - startOffset, buffOutSize), startOffset); - if (!result) { - break; - } - - if (ZSTD_isError(result)) { - fprintf(stderr, "ZSTD_seekable_decompress() error : %s \n", - ZSTD_getErrorName(result)); - exit(12); - } - fwrite_orDie(buffOut, result, fout); - startOffset += result; - } - */ - - return written; -} - void ContextZstdSeekable::seek(off_t arg_offset) { throw std::runtime_error("[ContextZstdSeekable::seek] not implemented.\n"); diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index f849b225..6535308b 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -65,12 +65,10 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) BOOST_CHECK(c1.typeid_state() != typeid(ContextZstdSeekable)); // Context equivalent - compressed - printf("checkpoint 1\n"); Context c2(fastafs_file_zstd.c_str()); c2.fopen(0); BOOST_CHECK(c2.typeid_state() == typeid(ContextZstdSeekable)); BOOST_CHECK(c2.typeid_state() != typeid(ContextUncompressed)); - printf("checkpoint 2\n"); @@ -95,6 +93,29 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) BOOST_CHECK(c1.typeid_state() == typeid(ContextUncompressed)); BOOST_CHECK(c1.typeid_state() != typeid(ContextZstdSeekable)); } + + // Context equivalent - compressed zstd + { + printf("checkpoint 1\n"); + + BOOST_CHECK_EQUAL(c2.tell(), 0); + printf("checkpoint 2\n"); + + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 403); + printf("checkpoint 3\n"); + + std_buffer = std::string(buffer, written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); + printf("checkpoint 4\n"); + + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + BOOST_CHECK_EQUAL(c2.tell(), 403); + printf("checkpoint 5\n"); + + BOOST_CHECK(c2.typeid_state() != typeid(ContextUncompressed)); + BOOST_CHECK(c2.typeid_state() == typeid(ContextZstdSeekable)); + } // test what happens when file is closed (twice) From 155f2a2f0e3dcfda52713f0324c954a70088c9fb Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Thu, 14 Apr 2022 21:22:14 +0200 Subject: [PATCH 46/65] tidy --- include/database.hpp | 2 +- include/fastafs.hpp | 7 +- include/fourbit_byte.hpp | 2 +- include/sequence_region.hpp | 30 +++++-- include/twobit_byte.hpp | 2 +- include/xbit_byte_encoder.hpp | 28 +++--- src/database.cpp | 19 ++-- src/fastafs.cpp | 32 +++---- src/flags.cpp | 2 +- src/fuse.cpp | 12 +-- src/ucsc2bit.cpp | 2 +- src/utils.cpp | 18 ++-- test/database/test_database.cpp | 4 +- test/fastafs/test_fastafs.cpp | 86 +++++++++---------- .../test_fivebit_fivebytes.cpp | 2 +- test/sequenceregion/test_sequenceregion.cpp | 10 +-- test/view/test_view.cpp | 24 +++--- 17 files changed, 146 insertions(+), 136 deletions(-) diff --git a/include/database.hpp b/include/database.hpp index 06f16734..5094f10e 100644 --- a/include/database.hpp +++ b/include/database.hpp @@ -16,7 +16,7 @@ class database public: database(const std::string &); - + const static std::string get_default_dir(); void force_db_exists(); diff --git a/include/fastafs.hpp b/include/fastafs.hpp index 92b00b51..609e5240 100644 --- a/include/fastafs.hpp +++ b/include/fastafs.hpp @@ -28,7 +28,7 @@ struct ffs2f_init_seq { std::vector m_starts;// file position based std::vector m_ends;// file position based - + const uint32_t filesize;// with padding and newlines [fastafs_seq->fasta_filesize(cache->padding_arg)] ffs2f_init_seq(const uint32_t padding, size_t n_blocks, size_t m_blocks, const uint32_t n_lines, const uint32_t filesize): @@ -46,7 +46,8 @@ struct ffs2f_init { ffs2f_init(size_t size, uint32_t padding_arg): padding_arg(padding_arg), sequences(size) {} - ~ffs2f_init(void) { + ~ffs2f_init(void) + { for(size_t i = 0; i < sequences.size(); i++) { delete sequences[i]; } @@ -113,7 +114,7 @@ class fastafs std::string name; std::string filename; compression_type filetype; - + std::vector data; uint32_t crc32f;// crc32 as found in fastafs file diff --git a/include/fourbit_byte.hpp b/include/fourbit_byte.hpp index 6cbb6bac..31e87e31 100644 --- a/include/fourbit_byte.hpp +++ b/include/fourbit_byte.hpp @@ -15,7 +15,7 @@ class fourbit_byte : public xbit_byte_encoder static const char xbit_byte_encoder::n_fill_unmasked = '-'; static const char xbit_byte_encoder::n_fill_masked = '-'; static const unsigned char xbit_byte_encoder::bits_per_nucleotide = 4; - + //static const char fourbit_alphabet[17]; static char encode_hash[256][3]; diff --git a/include/sequence_region.hpp b/include/sequence_region.hpp index e59c1c58..f3854c26 100644 --- a/include/sequence_region.hpp +++ b/include/sequence_region.hpp @@ -31,19 +31,31 @@ class sequence_region std::string seq_name; - + void parse(const char *); - + public: sequence_region(char *); - sequence_region(const char * seqstr ); - - - std::string get_seq_name() {return seq_name; }; - off_t get_start_position(void) const {return start; }; - off_t get_end_position(void) const {return end; }; - bool has_defined_end(void) const {return defined_end; }; + sequence_region(const char * seqstr); + + + std::string get_seq_name() + { + return seq_name; + }; + off_t get_start_position(void) const + { + return start; + }; + off_t get_end_position(void) const + { + return end; + }; + bool has_defined_end(void) const + { + return defined_end; + }; }; diff --git a/include/twobit_byte.hpp b/include/twobit_byte.hpp index 6e37a73d..e470216d 100644 --- a/include/twobit_byte.hpp +++ b/include/twobit_byte.hpp @@ -21,7 +21,7 @@ class twobit_byte : public xbit_byte_encoder char (&encode_hash)[256][5]; twobit_byte(char (&encode_hash_arg)[256][5]): encode_hash(encode_hash_arg) {}; - + static const char nucleotides_per_byte = 8 / twobit_byte::bits_per_nucleotide ; // this is about compressed data static const char nucleotides_per_chunk = 8 / twobit_byte::bits_per_nucleotide ; // this is about decompressed chunks diff --git a/include/xbit_byte_encoder.hpp b/include/xbit_byte_encoder.hpp index 9ab30c65..423fe24d 100644 --- a/include/xbit_byte_encoder.hpp +++ b/include/xbit_byte_encoder.hpp @@ -12,33 +12,33 @@ class xbit_byte_encoder { private: - + public: // these members need to be overwritten by parental classes static const char n_fill_unmasked; static const char n_fill_masked; - + static const unsigned char bits_per_nucleotide; unsigned char data; // go private xbit_byte_encoder() {}; -/* - char (&encode_hash)[256][5]; - twobit_byte(char (&encode_hash_arg)[256][5]): encode_hash(encode_hash_arg) {}; + /* + char (&encode_hash)[256][5]; + twobit_byte(char (&encode_hash_arg)[256][5]): encode_hash(encode_hash_arg) {}; - unsigned char data; // go private - void set(unsigned char, unsigned char); - void set(char*);// string met 4 bytes set - char *get(void); - char *get(unsigned char); + unsigned char data; // go private + void set(unsigned char, unsigned char); + void set(char*);// string met 4 bytes set + char *get(void); + char *get(unsigned char); - static unsigned char iterator_to_offset(unsigned int); + static unsigned char iterator_to_offset(unsigned int); - static const off_t nucleotides_to_compressed_fileoffset(size_t); // file offset waarna gelezen kan worden - static const off_t nucleotides_to_compressed_offset(size_t);// aantal bytes nodig om zoveel data weg te schrijven - */ + static const off_t nucleotides_to_compressed_fileoffset(size_t); // file offset waarna gelezen kan worden + static const off_t nucleotides_to_compressed_offset(size_t);// aantal bytes nodig om zoveel data weg te schrijven + */ void next(chunked_reader &); // update the compressed data }; diff --git a/src/database.cpp b/src/database.cpp index f9afe153..3f0934df 100644 --- a/src/database.cpp +++ b/src/database.cpp @@ -14,17 +14,15 @@ #include "lsfastafs.hpp" - + const std::string database::get_default_dir() { const char* home_c = getenv("HOME"); - if(home_c == nullptr) - { + if(home_c == nullptr) { struct passwd *pw = getpwuid(getuid()); home_c = pw->pw_dir; - if(home_c == nullptr) - { + if(home_c == nullptr) { throw std::runtime_error("Could not deterimine home dir. Also, no $HOME environment variable is set."); } } @@ -78,11 +76,11 @@ void database::list() std::ifstream infile(this->idx); std::string line; std::string version; - + while(std::getline(infile, line)) { std::string fname = this->path + "/" + line + ".fastafs"; bool zstd_seek = false; - + if(!file_exist(fname.c_str())) { fname = this->path + "/" + line + ".fastafs.zst"; zstd_seek = true; @@ -145,11 +143,10 @@ void database::list() // @todo return a filestream to a particular file one day? std::string database::add(char *name) { - if(this->get(name) != "") - { + if(this->get(name) != "") { throw std::runtime_error("Trying to add duplicate entry to database."); } - + std::ofstream outputFile; outputFile.open(this->idx, std::fstream::app); @@ -173,7 +170,7 @@ std::string database::get(char *fastafs_name_or_id) while(std::getline(infile, line, '\n')) { if(line.compare(fastafs_name_or_id) == 0) { fname = this->path + "/" + line + ".fastafs"; - + if(!file_exist(fname.c_str())) { fname = this->path + "/" + line + ".fastafs.zst"; } diff --git a/src/fastafs.cpp b/src/fastafs.cpp index debb89ee..e52b8b8f 100644 --- a/src/fastafs.cpp +++ b/src/fastafs.cpp @@ -208,8 +208,8 @@ template inline uint32_t fastafs_seq::view_fasta_chunk_generalized( } uint32_t pos = (uint32_t) start_pos_in_fasta; - - + + size_t pos_limit = this->name.size() + 2; if(pos < pos_limit) { const std::string header = ">" + this->name + "\n"; @@ -218,15 +218,15 @@ template inline uint32_t fastafs_seq::view_fasta_chunk_generalized( const uint32_t copied = (uint32_t) header.copy(buffer, tocopy, pos); // effective size of copied data written += (uint32_t) copied; - + if(written >= buffer_size) { return written; } - + pos += (uint32_t) copied; } - const uint32_t offset_from_sequence_line = (uint32_t) (pos - pos_limit); + const uint32_t offset_from_sequence_line = (uint32_t)(pos - pos_limit); size_t n_block = cache->n_starts.size(); size_t m_block = cache->m_starts.size(); uint32_t newlines_passed = offset_from_sequence_line / (cache->padding + 1);// number of newlines passed (within the sequence part) @@ -303,13 +303,13 @@ template inline uint32_t fastafs_seq::view_fasta_chunk_generalized( } if(pos == cur_n_end) { - //if(pos == cache->n_ends[n_block]) { + //if(pos == cache->n_ends[n_block]) { n_block++; cur_n_end = cache->n_ends[n_block]; cur_n_start = cache->n_starts[n_block]; } if(pos == cur_m_end) { - //if(pos == cache->m_ends[m_block]) { + //if(pos == cache->m_ends[m_block]) { m_block++; cur_m_end = cache->m_ends[m_block]; cur_m_start = cache->m_starts[m_block]; @@ -645,7 +645,7 @@ void fastafs::load(std::string afilename) chunked_reader fh_in = chunked_reader(afilename.c_str()); { this->filetype = fh_in.get_filetype(); - + memblock = new char [20 + 1]; //sha1 is 20b // if a user can't compile this line, please replace it with C's // 'realpath' function and delete/free afterwards and send a PR @@ -1483,9 +1483,9 @@ uint32_t fastafs::view_faidx_chunk(uint32_t padding, char *buffer, size_t buffer { std::string contents = this->get_faidx(padding); - size_t to_copy = std::min(buffer_size, contents.size() - file_offset ); + size_t to_copy = std::min(buffer_size, contents.size() - file_offset); - return (uint32_t) contents.copy(buffer, to_copy, file_offset ); + return (uint32_t) contents.copy(buffer, to_copy, file_offset); } @@ -1545,14 +1545,14 @@ int fastafs::info(bool ena_verify_checksum) std::cout << "# FASTAFS NAME: " << this->filename << "\n"; std::cout << "# FORMAT: v0-x32"; switch(this->filetype) { - case compression_type::undefined: - printf("?\n"); + case compression_type::undefined: + printf("?\n"); break; - case compression_type::uncompressed : - printf("\n"); + case compression_type::uncompressed : + printf("\n"); break; - case compression_type::zstd: - printf("+Z\n"); + case compression_type::zstd: + printf("+Z\n"); break; } printf("# SEQUENCES: %u\n", (uint32_t) this->data.size()); diff --git a/src/flags.cpp b/src/flags.cpp index 900d915f..7a468c77 100644 --- a/src/flags.cpp +++ b/src/flags.cpp @@ -40,7 +40,7 @@ bool twobit_flag::get_flag(unsigned char bit) /** * @param bit denotes the i'th of 16 bits to set value of * @param enable whether to enable of disable the bit - * + * * more info: https://www.learncpp.com/cpp-tutorial/bit-manipulation-with-bitwise-operators-and-bit-masks/ */ void twobit_flag::set_flag(unsigned char bit, bool enable) diff --git a/src/fuse.cpp b/src/fuse.cpp index a9dd99e9..1b9c11a7 100644 --- a/src/fuse.cpp +++ b/src/fuse.cpp @@ -116,7 +116,7 @@ static int do_getattr(const char *path, struct stat *st) st->st_nlink = 1; //@todo this needs to be defined with some api stuff:!! - st->st_size = (signed int) ffi->f->view_sequence_region_size( (strchr(path, '/') + 5)); + st->st_size = (signed int) ffi->f->view_sequence_region_size((strchr(path, '/') + 5)); } else { st->st_mode = S_IFREG | 0444; st->st_nlink = 1; @@ -248,13 +248,13 @@ static int do_open(const char *path, struct fuse_file_info *fi) ); //printf("sem init... \n"); - sem_init( &(ft->crs[ft->thread_i].sem), 0, 1 ); + sem_init(&(ft->crs[ft->thread_i].sem), 0, 1); //printf("sem init done... \n"); } ft->thread_i = 0; fi->fh = reinterpret_cast(ft); - + #if DEBUG printf("\033[0;35m fi->fh: %u\n", (unsigned int) fi->fh); printf("\033[0;35m fi->writepage: %u\n", fi->writepage); @@ -287,7 +287,7 @@ static int do_release(const char *path, struct fuse_file_info *fi) sem_destroy(&ft->crs[i].sem); delete ft->crs[i].cr; } - + delete ft; } @@ -692,7 +692,7 @@ fuse_instance *parse_args(int argc, char **argv, char **argv_fuse) if(fi->from_fastafs) { std::string fname; std::string name; - + if(from_file_rather_than_from_db) { fname = std::string(argv[mount_target_arg]); //name = std::filesystem::path(fname).filename(); @@ -779,7 +779,7 @@ void fuse(int argc, char *argv[]) fuse_main(ffi->argc_fuse, argv2, &operations, ffi); } //http://www.maastaar.net/fuse/linux/filesystem/c/2016/05/21/writing-a-simple-filesystem-using-fuse/ - + //return ret; } diff --git a/src/ucsc2bit.cpp b/src/ucsc2bit.cpp index 0c298c92..83ef6221 100644 --- a/src/ucsc2bit.cpp +++ b/src/ucsc2bit.cpp @@ -271,7 +271,7 @@ void ucsc2bit::load(std::string afilename) // check version for(i = 0 ; i < 4; i++) { - if(memblock[i+4] != UCSC2BIT_VERSION[i]) { + if(memblock[i + 4] != UCSC2BIT_VERSION[i]) { delete[] memblock; throw std::invalid_argument("Corrupt 2bit file. unknown version: " + filename); } diff --git a/src/utils.cpp b/src/utils.cpp index 390a9841..3ecb5c85 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -207,15 +207,15 @@ bool is_ucsc2bit_file(char *filename) if(fread(buf, 1, 4, fp) == 4) { fclose(fp); - - + + return UCSC2BIT_MAGIC.compare(0, 4, buf) == 0; //return ( - //buf[0] == UCSC2BIT_MAGIC[0] and - //buf[1] == UCSC2BIT_MAGIC[1] and - //buf[2] == UCSC2BIT_MAGIC[2] and - //buf[3] == UCSC2BIT_MAGIC[3] - //);// return true if first byte equals > + //buf[0] == UCSC2BIT_MAGIC[0] and + //buf[1] == UCSC2BIT_MAGIC[1] and + //buf[2] == UCSC2BIT_MAGIC[2] and + //buf[3] == UCSC2BIT_MAGIC[3] + //);// return true if first byte equals > } else { fclose(fp); @@ -330,8 +330,8 @@ bool file_exist(const char *fileName) //moe classical but slower implementation //std::ifstream infile(fileName); //return infile.good(); - + //following implementation should be faster struct stat buffer; - return (stat (fileName, &buffer) == 0); + return (stat(fileName, &buffer) == 0); } diff --git a/test/database/test_database.cpp b/test/database/test_database.cpp index fe11086a..c3dc2918 100644 --- a/test/database/test_database.cpp +++ b/test/database/test_database.cpp @@ -19,10 +19,10 @@ BOOST_AUTO_TEST_CASE(test_database__01) const std::string default_dir_1 = database::get_default_dir(); unsetenv("HOME"); const std::string default_dir_2 = database::get_default_dir(); - + BOOST_REQUIRE(default_dir_1.size() > 0); BOOST_REQUIRE(default_dir_2.size() > 0); - + BOOST_CHECK_EQUAL(default_dir_1, default_dir_2); // printf("[%s]==[%s]\n", default_dir_1.c_str(), default_dir_2.c_str()); } diff --git a/test/fastafs/test_fastafs.cpp b/test/fastafs/test_fastafs.cpp index 3dc187d2..3db8ed32 100644 --- a/test/fastafs/test_fastafs.cpp +++ b/test/fastafs/test_fastafs.cpp @@ -930,64 +930,64 @@ BOOST_AUTO_TEST_CASE(test_fastafs__failing_example) { // s=4096, off=20480 -/* + /* - // is auto-generated by python script - fastafs fs = fastafs("test"); - fs.load("tmp/benchmark/test.zst"); + // is auto-generated by python script + fastafs fs = fastafs("test"); + fs.load("tmp/benchmark/test.zst"); - BOOST_REQUIRE(fs.data.size() > 0); + BOOST_REQUIRE(fs.data.size() > 0); - ffs2f_init* cache_p40 = fs.init_ffs2f(40, true); // equals original fasta + ffs2f_init* cache_p40 = fs.init_ffs2f(40, true); // equals original fasta - const int READ_BUFFER_SIZE_F = 4096 ; // make sure it is large enough, error occurrsed with buf len=4096 - char* buffer = new char[READ_BUFFER_SIZE_F + 2]; - uint32_t ret; + const int READ_BUFFER_SIZE_F = 4096 ; // make sure it is large enough, error occurrsed with buf len=4096 + char* buffer = new char[READ_BUFFER_SIZE_F + 2]; + uint32_t ret; - // test the first read - chunked_reader fh1 = chunked_reader(fs.filename.c_str()); - flush_buffer(buffer, READ_BUFFER_SIZE_F + 1, '\0'); - ret = fs.view_fasta_chunk(cache_p40, buffer, 4096, 0, fh1); - //printf("[%i]\n", ret); - buffer[4096] = '\0'; - //printf("[%s]\n", buffer); - //printf("----------------------------------------------------------------\n", buffer); + // test the first read + chunked_reader fh1 = chunked_reader(fs.filename.c_str()); + flush_buffer(buffer, READ_BUFFER_SIZE_F + 1, '\0'); + ret = fs.view_fasta_chunk(cache_p40, buffer, 4096, 0, fh1); + //printf("[%i]\n", ret); + buffer[4096] = '\0'; + //printf("[%s]\n", buffer); + //printf("----------------------------------------------------------------\n", buffer); - // test the first read - flush_buffer(buffer, READ_BUFFER_SIZE_F + 1, '\0'); - ret = fs.view_fasta_chunk(cache_p40, buffer, 4096, 0); - //printf("[%i]\n", ret); - buffer[4096] = '\0'; - //printf("[%s]\n", buffer); - //printf("----------------------------------------------------------------\n", buffer); + // test the first read + flush_buffer(buffer, READ_BUFFER_SIZE_F + 1, '\0'); + ret = fs.view_fasta_chunk(cache_p40, buffer, 4096, 0); + //printf("[%i]\n", ret); + buffer[4096] = '\0'; + //printf("[%s]\n", buffer); + //printf("----------------------------------------------------------------\n", buffer); - // test the first read - //chunked_reader fh2 = chunked_reader(fs.filename.c_str()); - flush_buffer(buffer, READ_BUFFER_SIZE_F + 1, '\0'); - ret = fs.view_fasta_chunk(cache_p40, buffer, 4096, 20480, fh1); - //printf("[%i]\n", ret); - buffer[4096] = '\0'; - //printf("[%s]\n", buffer); - //printf("----------------------------------------------------------------\n", buffer); + // test the first read + //chunked_reader fh2 = chunked_reader(fs.filename.c_str()); + flush_buffer(buffer, READ_BUFFER_SIZE_F + 1, '\0'); + ret = fs.view_fasta_chunk(cache_p40, buffer, 4096, 20480, fh1); + //printf("[%i]\n", ret); + buffer[4096] = '\0'; + //printf("[%s]\n", buffer); + //printf("----------------------------------------------------------------\n", buffer); - // test the first read - flush_buffer(buffer, READ_BUFFER_SIZE_F + 1, '\0'); - ret = fs.view_fasta_chunk(cache_p40, buffer, 4096, 20480); - //printf("[%i]\n", ret); - buffer[4096] = '\0'; - //printf("[%s]\n", buffer); - //printf("----------------------------------------------------------------\n", buffer); + // test the first read + flush_buffer(buffer, READ_BUFFER_SIZE_F + 1, '\0'); + ret = fs.view_fasta_chunk(cache_p40, buffer, 4096, 20480); + //printf("[%i]\n", ret); + buffer[4096] = '\0'; + //printf("[%s]\n", buffer); + //printf("----------------------------------------------------------------\n", buffer); - delete cache_p40; - delete[] buffer; - * - */ + delete cache_p40; + delete[] buffer; + * + */ } diff --git a/test/fivebit_fivebytes/test_fivebit_fivebytes.cpp b/test/fivebit_fivebytes/test_fivebit_fivebytes.cpp index 6a9605d4..060b2366 100644 --- a/test/fivebit_fivebytes/test_fivebit_fivebytes.cpp +++ b/test/fivebit_fivebytes/test_fivebit_fivebytes.cpp @@ -141,7 +141,7 @@ BOOST_AUTO_TEST_CASE(test_fivebit_fivebytes__dict_conversions) for(size_t i = 0; i < dict.size(); i++) { // set and compress amino acid string for(size_t j = 0 ; j < 8; j ++) { - f.set((unsigned char) j, (unsigned char) hash[ (unsigned char) dict[i][j] ]) ; + f.set((unsigned char) j, (unsigned char) hash[(unsigned char) dict[i][j] ]) ; } // decompress diff --git a/test/sequenceregion/test_sequenceregion.cpp b/test/sequenceregion/test_sequenceregion.cpp index 9ef05577..acf7033a 100644 --- a/test/sequenceregion/test_sequenceregion.cpp +++ b/test/sequenceregion/test_sequenceregion.cpp @@ -52,7 +52,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region) BOOST_CHECK_EQUAL(sr.get_start_position(), 123); BOOST_CHECK_EQUAL(sr.has_defined_end(), true); - BOOST_CHECK_EQUAL(sr.get_end_position() , 123); + BOOST_CHECK_EQUAL(sr.get_end_position(), 123); } { @@ -63,7 +63,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region) BOOST_CHECK_EQUAL(sr.get_start_position(), 0); BOOST_CHECK_EQUAL(sr.has_defined_end(), true); - BOOST_CHECK_EQUAL(sr.get_end_position() , 123); + BOOST_CHECK_EQUAL(sr.get_end_position(), 123); } { @@ -128,7 +128,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region3) BOOST_CHECK_EQUAL(sr.get_start_position(), 1234); BOOST_CHECK_EQUAL(sr.has_defined_end(), true); - BOOST_CHECK_EQUAL(sr.get_end_position() , 1234); + BOOST_CHECK_EQUAL(sr.get_end_position(), 1234); } { @@ -139,7 +139,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region3) BOOST_CHECK_EQUAL(sr.get_start_position(), 0); BOOST_CHECK_EQUAL(sr.has_defined_end(), true); - BOOST_CHECK_EQUAL(sr.get_end_position() , 1234); + BOOST_CHECK_EQUAL(sr.get_end_position(), 1234); } { @@ -150,7 +150,7 @@ BOOST_AUTO_TEST_CASE(test_sequence_region3) BOOST_CHECK_EQUAL(sr.get_start_position(), 1234); BOOST_CHECK_EQUAL(sr.has_defined_end(), true); - BOOST_CHECK_EQUAL(sr.get_end_position() , 1235); + BOOST_CHECK_EQUAL(sr.get_end_position(), 1235); } diff --git a/test/view/test_view.cpp b/test/view/test_view.cpp index f2f9479b..e883eb1d 100644 --- a/test/view/test_view.cpp +++ b/test/view/test_view.cpp @@ -28,7 +28,7 @@ static int test_i = 0; BOOST_AUTO_TEST_CASE(test_fastafs_seq_static_func) { - printf("test %i\n",++test_i); + printf("test %i\n", ++test_i); /* padding=4, offset=0, position_until=0, 1, 2, 3: 0 "A" "AC" "ACT" "ACTG" padding=4, offset=0, position_until=4, 5, 6, 7, 8: 1 "ACTG\n" "ACTG\nA" "ACTG\nAA" "ACTG\nAAA" "ACTG\nAAAA" @@ -135,7 +135,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_seq_static_func) BOOST_AUTO_TEST_CASE(test_fastafs_twobit_offset_calc) { - printf("test %i\n",++test_i); + printf("test %i\n", ++test_i); // testing "ACTGACTGNNNNACTG" uint32_t num_Ns; // number of N's until certain nucleotide is reached @@ -184,7 +184,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_twobit_offset_calc) */ BOOST_AUTO_TEST_CASE(test_chunked_viewing) { - printf("test %i\n",++test_i); + printf("test %i\n", ++test_i); uint32_t written; @@ -360,7 +360,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing) BOOST_AUTO_TEST_CASE(test_chunked_viewing_sub) { - printf("test %i\n",++test_i); + printf("test %i\n", ++test_i); uint32_t written; std::string test_name = "test"; @@ -405,7 +405,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_sub) BOOST_AUTO_TEST_CASE(test_chunked_viewing_fourbit) { - printf("test %i\n",++test_i); + printf("test %i\n", ++test_i); std::string test_name = "test_004"; std::string fasta_file = "test/data/" + test_name + ".fa"; @@ -547,7 +547,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_fourbit) // it can return less bytes than the buffer_size BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen) { - printf("test %i\n",++test_i); + printf("test %i\n", ++test_i); BOOST_REQUIRE_EQUAL(READ_BUFFER_SIZE, 4096);// required for this test @@ -581,7 +581,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen) // it can return less bytes than the buffer_size BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen_lim) { - printf("test %i\n",++test_i); + printf("test %i\n", ++test_i); BOOST_REQUIRE_EQUAL(READ_BUFFER_SIZE, 4096);// required for this test @@ -616,7 +616,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen_lim) // it can return less bytes than the buffer_size BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen2) { - printf("test %i\n",++test_i); + printf("test %i\n", ++test_i); BOOST_REQUIRE_EQUAL(READ_BUFFER_SIZE, 4096);// required for this test @@ -656,7 +656,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen2) BOOST_AUTO_TEST_CASE(test_chunked_viewing_zstd) { - printf("test %i\n",++test_i); + printf("test %i\n", ++test_i); std::string test_name = "test"; @@ -845,7 +845,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_zstd) BOOST_AUTO_TEST_CASE(test_chunked_viewing2) { - printf("test %i\n",++test_i); + printf("test %i\n", ++test_i); std::string test_name = "test_003"; std::string fasta_file = "test/data/" + test_name + ".fa"; @@ -899,8 +899,8 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing2) size_t n = full_file.size(); uint32_t start_pos = 0; for(float i = 0.0; i <= 12.0; i += 1) { // perform limited subset of tests - start_pos = (uint32_t) ((i/12.0) * (double) n); - printf(" - %uli / %zu\n",start_pos, n); + start_pos = (uint32_t)((i / 12.0) * (double) n); + printf(" - %uli / %zu\n", start_pos, n); for(uint32_t buffer_len = (uint32_t) full_file.size() - start_pos; buffer_len > 0; buffer_len--) { std::string substr_file = std::string(full_file, start_pos, buffer_len); From 135bc87faf674e3b07ab81441cc995d05c550809 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Thu, 14 Apr 2022 21:33:37 +0200 Subject: [PATCH 47/65] another test passed --- test/chunked_reader/test_chunked_reader.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index 6535308b..44787da8 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -141,6 +141,19 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); BOOST_CHECK_EQUAL(c1.tell(), 403); } + + // Context equivalent - compressed zstd + { + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + BOOST_CHECK_EQUAL(c2.tell(), 403); + + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + BOOST_CHECK_EQUAL(c2.tell(), 403); + } // test seek stuff From 03f84a7ae06e0a2f03160e439348937b713bf11f Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Fri, 15 Apr 2022 19:52:09 +0200 Subject: [PATCH 48/65] debug me --- src/chunked_reader.cpp | 6 +-- test/chunked_reader/test_chunked_reader.cpp | 44 +++++++++++++++++++++ 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 7337a6aa..49a1471f 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -534,13 +534,13 @@ void ContextZstdSeekable::fopen(off_t start_pos) this->fh = ZSTD_seekable_decompressFile_init(this->context->get_filename().c_str()); - if((fh->fin == NULL) | feof(fh->fin)) + if((this->fh->fin == NULL) | feof(this->fh->fin)) { throw std::runtime_error("[ContextZstdSeekable::fopen] not implemented.\n"); } else { - fseek_orDie(fh->fin, 0, SEEK_SET);// set initial file handle to 0? + fseek_orDie(this->fh->fin, 0, SEEK_SET);// set initial file handle to 0? // this->fh->seekg(start_pos, std::ios::beg); size_t const initResult = ZSTD_seekable_initFile(this->seekable, fh->fin); @@ -553,7 +553,7 @@ void ContextZstdSeekable::fopen(off_t start_pos) void ContextZstdSeekable::seek(off_t arg_offset) { - throw std::runtime_error("[ContextZstdSeekable::seek] not implemented.\n"); + fseek_orDie(fh->fin, arg_offset, SEEK_SET); } ContextZstdSeekable::~ContextZstdSeekable() diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index 44787da8..78bcf6d9 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -176,6 +176,17 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) BOOST_CHECK_EQUAL(c1.tell(), 402); } + // Context equivalent - compressed zstd + { + BOOST_CHECK_EQUAL(c2.tell(), 403); + c2.seek(0); + BOOST_CHECK_EQUAL(c2.tell(), 0); + c2.seek(1); + BOOST_CHECK_EQUAL(c2.tell(), 1); + c2.seek(402); + BOOST_CHECK_EQUAL(c2.tell(), 402); + } + r_flat.seek(0); written = r_flat.read(buffer, 4); @@ -200,6 +211,20 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) } + // Context equivalent - compressed zstd + { + c2.seek(0); + BOOST_CHECK_EQUAL(c2.tell(), 0); + written = c2.read(buffer, 4); + BOOST_CHECK_EQUAL(written, 4); + + BOOST_CHECK_EQUAL(c2.tell(), 4); + std_buffer = std::string(buffer, written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + r_flat.seek(1); // reset to first pos in file BOOST_CHECK_EQUAL(r_flat.tell(), 1); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); @@ -223,13 +248,32 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } + // Context equivalent - compressed zstd + { + c2.seek(1); + BOOST_CHECK_EQUAL(c2.tell(), 1); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + written = c2.read(buffer, 4); + BOOST_CHECK_EQUAL(written, 4); + BOOST_CHECK_EQUAL(c2.tell(), 5); + std_buffer = std::string(buffer, written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + //@todo should trigger error!? r_flat.seek(1024*1024); // trigger out of bound // Context equivalent - uncompressed { c1.seek(1024*1024); } + + // Context equivalent - compressed zstd + { + c2.seek(1024*1024); + } } { From f1bd0c9d98cecd256b968a90d3072ecc7405fe84 Mon Sep 17 00:00:00 2001 From: youri Date: Tue, 21 Jun 2022 09:30:53 +0200 Subject: [PATCH 49/65] resolved an issue --- include/chunked_reader.hpp | 1 + src/chunked_reader.cpp | 19 +++++++-- test/chunked_reader/test_chunked_reader.cpp | 47 ++++++++++++++++++++- 3 files changed, 62 insertions(+), 5 deletions(-) diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index 5ca7d13f..500ed065 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -159,6 +159,7 @@ class Context // master chunked_reader size_t read(char *, size_t); void seek(off_t); size_t tell(); + size_t get_file_i(); }; diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 49a1471f..17eff974 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -20,8 +20,6 @@ chunked_reader::chunked_reader(const char * afilename) : chunked_reader::~chunked_reader() { - //printf("[chunked_reader::~chunked_reader] exterminate, destroy(!)\n"); - if(this->fh_flat != nullptr) { if(this->fh_flat->is_open()) { this->fh_flat->close(); @@ -376,9 +374,20 @@ void Context::seek(off_t arg_offset) // positio in the (decompressed) file size_t Context::tell() { + printf("Context :: tell: %i - %i + %i = %i\n", + this->file_i , + this->buffer_n , + this->buffer_i , + this->file_i - this->buffer_n + this->buffer_i); + return this->file_i - this->buffer_n + this->buffer_i; } +size_t Context::get_file_i() +{ + return this->file_i; +} + const std::type_info& Context::typeid_state() { return typeid(*this->state); // somehow pointer is needed to return ContextSubvariant rather than State @@ -484,11 +493,12 @@ size_t ContextZstdSeekable::cache_buffer() // figure out the location in the decompressed file - printf("%i\n",this->context->tell()); + printf("[%i] << tell \n",this->context->tell()); + printf("[%i] << file_i \n",this->context->get_file_i()); size_t written = ZSTD_seekable_decompressFile_orDie( this->fh, - this->context->tell(), //this->context->file_i, + this->context->get_file_i(), //this->context->file_i, this->context->get_buffer(), this->context->tell() + READ_BUFFER_SIZE //this->context->file_i + READ_BUFFER_SIZE ); @@ -553,6 +563,7 @@ void ContextZstdSeekable::fopen(off_t start_pos) void ContextZstdSeekable::seek(off_t arg_offset) { + printf("fseekordie: %i\n", arg_offset); fseek_orDie(fh->fin, arg_offset, SEEK_SET); } diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index 78bcf6d9..ecb1a415 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -196,7 +196,6 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - // Context equivalent - uncompressed { c1.seek(0); @@ -250,14 +249,30 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) // Context equivalent - compressed zstd { + printf("---------------------\n"); + c2.seek(1); + printf("?????????????????????\n"); BOOST_CHECK_EQUAL(c2.tell(), 1); + + printf(":::::::::::::::::::::\n"); + + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); written = c2.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); BOOST_CHECK_EQUAL(c2.tell(), 5); std_buffer = std::string(buffer, written); + BOOST_CHECK_EQUAL(std_buffer.size(), reference3.size()); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); + if(std_buffer.compare(reference3) != 0) { + printf("%i != %i\n",reference3.size(), std_buffer.size()); + printf("%s != %s\n",reference3, std_buffer); + + printf("[%u][%u][%u][%u]\n",(unsigned char) reference3[0],reference3[1],reference3[2],reference3[3]); + printf("[%u][%u][%u][%u]\n",(unsigned char) buffer[0],buffer[1],buffer[2],buffer[3]); + //printf("[%c][%c][%c][%c]\n"); + } flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } @@ -278,12 +293,42 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) { chunked_reader r_zstd = chunked_reader(fastafs_file_zstd.c_str()); + + // Context equivalent - uncompressed + Context c1(fastafs_file.c_str()); + c1.fopen(0); + BOOST_CHECK(c1.typeid_state() == typeid(ContextUncompressed)); + BOOST_CHECK(c1.typeid_state() != typeid(ContextZstdSeekable)); + + // Context equivalent - compressed + Context c2(fastafs_file_zstd.c_str()); + c2.fopen(0); + BOOST_CHECK(c2.typeid_state() == typeid(ContextZstdSeekable)); + BOOST_CHECK(c2.typeid_state() != typeid(ContextUncompressed)); + + written = r_zstd.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 403); std_buffer = std::string(buffer, written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + + //{ + //written = c1.read(buffer, 1024); + //BOOST_CHECK_EQUAL(written, 403); + //std_buffer = std::string(buffer, written); + //BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); + //flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + //} + + //{ + //written = c2.read(buffer, 1024); + //BOOST_CHECK_EQUAL(written, 403); + //std_buffer = std::string(buffer, written); + //BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); + //flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + //} written = r_zstd.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 0); From 301f2ec069fee637b2d6e974ee7b3a0dc9ced542 Mon Sep 17 00:00:00 2001 From: youri Date: Tue, 21 Jun 2022 09:35:34 +0200 Subject: [PATCH 50/65] more tests --- src/chunked_reader.cpp | 1 - test/chunked_reader/test_chunked_reader.cpp | 60 +++++++++++++-------- 2 files changed, 39 insertions(+), 22 deletions(-) diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 17eff974..ba4b8ec1 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -493,7 +493,6 @@ size_t ContextZstdSeekable::cache_buffer() // figure out the location in the decompressed file - printf("[%i] << tell \n",this->context->tell()); printf("[%i] << file_i \n",this->context->get_file_i()); size_t written = ZSTD_seekable_decompressFile_orDie( diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index ecb1a415..79eaebcd 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -249,15 +249,9 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) // Context equivalent - compressed zstd { - printf("---------------------\n"); - c2.seek(1); - printf("?????????????????????\n"); BOOST_CHECK_EQUAL(c2.tell(), 1); - printf(":::::::::::::::::::::\n"); - - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); written = c2.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); @@ -314,31 +308,55 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - //{ - //written = c1.read(buffer, 1024); - //BOOST_CHECK_EQUAL(written, 403); - //std_buffer = std::string(buffer, written); - //BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); - //flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - //} - - //{ - //written = c2.read(buffer, 1024); - //BOOST_CHECK_EQUAL(written, 403); - //std_buffer = std::string(buffer, written); - //BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); - //flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - //} + { + written = c1.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 403); + std_buffer = std::string(buffer, written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + { + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 403); + std_buffer = std::string(buffer, written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } written = r_zstd.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { + written = c1.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + { + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + // test what happens when file is closed written = r_zstd.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { + written = c1.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + { + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + // test seek stuff r_zstd.seek(0); // reset to first pos in file BOOST_CHECK_EQUAL(r_zstd.tell(), 0); From 5197a9caacd06839fbd8394a65b7e8fe6a22f520 Mon Sep 17 00:00:00 2001 From: youri Date: Tue, 21 Jun 2022 09:40:45 +0200 Subject: [PATCH 51/65] next to resolve --- test/chunked_reader/test_chunked_reader.cpp | 57 +++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index 79eaebcd..f50336c7 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -360,6 +360,16 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) // test seek stuff r_zstd.seek(0); // reset to first pos in file BOOST_CHECK_EQUAL(r_zstd.tell(), 0); + + { + r_zstd.seek(0); // reset to first pos in file + BOOST_CHECK_EQUAL(c1.tell(), 0); + } + + { + r_zstd.seek(0); // reset to first pos in file + BOOST_CHECK_EQUAL(c2.tell(), 0); + } written = r_zstd.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); @@ -368,17 +378,64 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { + written = c1.read(buffer, 4); + BOOST_CHECK_EQUAL(written, 4); + BOOST_CHECK_EQUAL(r_zstd.tell(), 4); + std_buffer = std::string(buffer, written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + { + written = c2.read(buffer, 4); + BOOST_CHECK_EQUAL(written, 4); + BOOST_CHECK_EQUAL(r_zstd.tell(), 4); + std_buffer = std::string(buffer, written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } r_zstd.seek(1); // reset to first pos in file BOOST_CHECK_EQUAL(r_zstd.tell(), 1); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { + c1.seek(1); // reset to first pos in file + BOOST_CHECK_EQUAL(r_zstd.tell(), 1); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + { + c2.seek(1); // reset to first pos in file + BOOST_CHECK_EQUAL(r_zstd.tell(), 1); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + written = r_zstd.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); BOOST_CHECK_EQUAL(r_zstd.tell(), 5); std_buffer = std::string(buffer, written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + + { + written = c1.read(buffer, 4); + BOOST_CHECK_EQUAL(written, 4); + BOOST_CHECK_EQUAL(r_zstd.tell(), 5); + std_buffer = std::string(buffer, written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + { + written = c2.read(buffer, 4); + BOOST_CHECK_EQUAL(written, 4); + BOOST_CHECK_EQUAL(r_zstd.tell(), 5); + std_buffer = std::string(buffer, written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } } } From fe20341cb4906ab0dbe166d8b855dd55163db028 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Thu, 14 Jul 2022 16:28:47 +0200 Subject: [PATCH 52/65] resolved copy paste typo in test --- test/chunked_reader/test_chunked_reader.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index f50336c7..faef0b2e 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -362,12 +362,12 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) BOOST_CHECK_EQUAL(r_zstd.tell(), 0); { - r_zstd.seek(0); // reset to first pos in file + c1.seek(0); // reset to first pos in file BOOST_CHECK_EQUAL(c1.tell(), 0); } { - r_zstd.seek(0); // reset to first pos in file + c2.seek(0); // reset to first pos in file BOOST_CHECK_EQUAL(c2.tell(), 0); } From 142850eacb186367f77bcfaca6fe27a0a9e49e65 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Wed, 10 Aug 2022 09:17:29 +0200 Subject: [PATCH 53/65] resolved quite some issues --- include/chunked_reader.hpp | 29 +-- include/flags.hpp | 2 +- include/utils.hpp | 6 +- meson.build | 15 +- src/chunked_reader.cpp | 167 ++++++++----- src/fasta_to_fastafs.cpp | 6 +- src/fastafs.cpp | 39 ++- src/flags.cpp | 2 +- src/ucsc2bit.cpp | 18 +- src/ucsc2bit_to_fastafs.cpp | 32 +-- src/utils.cpp | 6 +- src/xbit_byte_encoder.cpp | 6 +- test/cache/test_cache.cpp | 259 ++++++++++---------- test/chunked_reader/test_chunked_reader.cpp | 137 +++++------ test/fastafs/test_fastafs.cpp | 8 +- test/fastafs/test_ucsc2bit.cpp | 11 +- test/flags/test_flags.cpp | 2 +- test/view/test_view.cpp | 6 +- 18 files changed, 429 insertions(+), 322 deletions(-) diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index 500ed065..1c322dc7 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -31,7 +31,7 @@ enum compression_type : signed char { // dit is State -class chunked_reader // dit is Context +class chunked_reader_old // dit is Context { private: std::ifstream *fh_flat; @@ -49,9 +49,9 @@ class chunked_reader // dit is Context public: - chunked_reader(char *); // filename - chunked_reader(const char *); // filename - ~chunked_reader(); + chunked_reader_old(char *); // filename + chunked_reader_old(const char *); // filename + ~chunked_reader_old(); void init(); // generic tasks needed for init @@ -76,17 +76,17 @@ class chunked_reader // dit is Context -class Context; +class chunked_reader; class State { protected: - Context *context; // back-reference to context, to access file_i, filename etc. + chunked_reader *context; // back-reference to context, to access file_i, filename etc. public: virtual ~State() {}; - void set_context(Context *); - size_t read(char *, size_t, size_t &, size_t &); // reads from buffer, context a-specific + void set_context(chunked_reader *); + size_t read(unsigned char *, size_t, size_t &, size_t &); // reads from buffer, context a-specific // virtual functions: virtual void fopen(off_t) = 0; @@ -129,7 +129,7 @@ class ContextZstdSeekable : public State }; -class Context // master chunked_reader +class chunked_reader // master chunked_reader { protected: std::string filename; @@ -145,18 +145,20 @@ class Context // master chunked_reader public: void TransitionTo(State *); // @todo rename to set_compression_type - Context(const char *) ; - + chunked_reader(const char *) ; + ~chunked_reader(); + State* find_state(); const std::type_info& typeid_state(); - const std::string& get_filename(); char* get_buffer(); + compression_type get_filetype(); + void fopen(off_t); size_t cache_buffer(); - size_t read(char *, size_t); + size_t read(unsigned char *, size_t); void seek(off_t); size_t tell(); size_t get_file_i(); @@ -164,6 +166,5 @@ class Context // master chunked_reader - #endif diff --git a/include/flags.hpp b/include/flags.hpp index bc5b361b..38a9a93a 100644 --- a/include/flags.hpp +++ b/include/flags.hpp @@ -51,7 +51,7 @@ class twobit_flag bool get_flag(unsigned char); public: - void set(char *); + void set(unsigned char *); std::array &get_bits(void); // get bit 0 or bit 1 - needed for exporting flags to file(s) }; diff --git a/include/utils.hpp b/include/utils.hpp index e5751f6f..b8a62b5d 100644 --- a/include/utils.hpp +++ b/include/utils.hpp @@ -1,13 +1,13 @@ -uint32_t fourbytes_to_uint(char *, unsigned char); -uint32_t fourbytes_to_uint_ucsc2bit(char *, unsigned char); +uint32_t fourbytes_to_uint(unsigned char *, unsigned char); +uint32_t fourbytes_to_uint_ucsc2bit(unsigned char *, unsigned char); // for flags uint16_t twobytes_to_uint(char *); void uint_to_twobytes(char *chars, uint16_t n); size_t remove_chars(char *s, int c, size_t l);// to remove - characters from string -void uint_to_fourbytes(char *, uint32_t); +void uint_to_fourbytes(unsigned char *, uint32_t); void uint_to_fourbytes_ucsc2bit(char *, uint32_t); char *human_readable_fs(uint32_t, char *); diff --git a/meson.build b/meson.build index 615879a5..5231aba4 100644 --- a/meson.build +++ b/meson.build @@ -30,7 +30,16 @@ configuration_inc = include_directories('include') src = [ './dependencies/zstd-lib-common/xxhash.cpp', './dependencies/zstd-seekable-adapted/zstdseek_compress.cpp', './dependencies/zstd-seekable-adapted/zstdseek_decompress.cpp', './dependencies/zstd-seekable-adapted/zstdseek_utils.cpp', -'./src/chunked_reader.cpp', './src/database.cpp', './src/fastafs.cpp', './src/fasta_to_fastafs.cpp', './src/fivebit_fivebytes.cpp', './src/flags.cpp', './src/fourbit_byte.cpp', './src/fuse.cpp', './src/sequence_region.cpp', './src/twobit_byte.cpp', './src/ucsc2bit.cpp', './src/ucsc2bit_to_fastafs.cpp', './src/utils.cpp', './src/lsfastafs.cpp', './src/main.cpp'] +'./src/chunked_reader.cpp', './src/database.cpp', './src/fastafs.cpp', './src/fasta_to_fastafs.cpp', './src/xbit_byte_encoder.cpp', './src/fivebit_fivebytes.cpp', './src/flags.cpp', './src/fourbit_byte.cpp', './src/fuse.cpp', './src/sequence_region.cpp', './src/twobit_byte.cpp', './src/ucsc2bit.cpp', './src/ucsc2bit_to_fastafs.cpp', './src/utils.cpp', './src/lsfastafs.cpp', + './src/main.cpp' + ] + +src2 = [ +'./dependencies/zstd-lib-common/xxhash.cpp', +'./dependencies/zstd-seekable-adapted/zstdseek_compress.cpp', './dependencies/zstd-seekable-adapted/zstdseek_decompress.cpp', './dependencies/zstd-seekable-adapted/zstdseek_utils.cpp', +'./src/chunked_reader.cpp', './src/database.cpp', './src/fastafs.cpp', './src/fasta_to_fastafs.cpp', './src/xbit_byte_encoder.cpp', './src/fivebit_fivebytes.cpp', './src/flags.cpp', './src/fourbit_byte.cpp', './src/fuse.cpp', './src/sequence_region.cpp', './src/twobit_byte.cpp', './src/ucsc2bit.cpp', './src/ucsc2bit_to_fastafs.cpp', './src/utils.cpp', './src/lsfastafs.cpp', + './test/cache/test_cache.cpp' + ] incdir = include_directories('dependencies/zstd-seekable-adapted', './dependencies/zstd-lib-common', 'include') @@ -46,3 +55,7 @@ executable('fastafs', src, include_directories : incdir, dependencies: [crypto, openssl, fuse, zlib, zstd]) +executable('test_cache_meson', src2, + include_directories : incdir, + dependencies: [crypto, openssl, fuse, zlib, zstd]) + diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index ba4b8ec1..d95cdfaa 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -3,7 +3,7 @@ -chunked_reader::chunked_reader(char * afilename) : +chunked_reader_old::chunked_reader_old(char * afilename) : fh_flat(nullptr), fh_zstd(nullptr), buffer_i(0), buffer_n(0), file_i(0) { @@ -11,14 +11,14 @@ chunked_reader::chunked_reader(char * afilename) : this->init(); } -chunked_reader::chunked_reader(const char * afilename) : +chunked_reader_old::chunked_reader_old(const char * afilename) : fh_flat(nullptr), fh_zstd(nullptr), buffer_i(0), buffer_n(0), file_i(0) { this->filename = realpath_cpp(afilename); this->init(); } -chunked_reader::~chunked_reader() +chunked_reader_old::~chunked_reader_old() { if(this->fh_flat != nullptr) { if(this->fh_flat->is_open()) { @@ -39,7 +39,7 @@ chunked_reader::~chunked_reader() } } -void chunked_reader::init() +void chunked_reader_old::init() { this->find_filetype(); @@ -53,24 +53,24 @@ void chunked_reader::init() this->fh_flat->seekg(0, std::ios::beg); this->update_flat_buffer(); } else { - throw std::runtime_error("[chunked_reader::init] Cannot open file for reading.\n"); + throw std::runtime_error("[chunked_reader_old::init] Cannot open file for reading.\n"); } break; case zstd: - //printf("[chunked_reader::init()] - init ZSTD_seekable_decompress_init_data* fh_zstd; \n"); + //printf("[chunked_reader_old::init()] - init ZSTD_seekable_decompress_init_data* fh_zstd; \n"); this->fh_zstd = ZSTD_seekable_decompressFile_init(this->filename.c_str()); // make zstd handle - to be implemented later on //ZSTD_seekable_decompress_data break; default: - throw std::runtime_error("[chunked_reader::init] Should never happen - but avoids compiler warning.\n"); + throw std::runtime_error("[chunked_reader_old::init] Should never happen - but avoids compiler warning.\n"); break; } } -void chunked_reader::find_filetype() +void chunked_reader_old::find_filetype() { if(is_zstd_file((const char*) this->filename.c_str())) { this->set_filetype(zstd); @@ -79,7 +79,7 @@ void chunked_reader::find_filetype() } } -void chunked_reader::set_filetype(compression_type arg_filetype) +void chunked_reader_old::set_filetype(compression_type arg_filetype) { this->filetype = arg_filetype; } @@ -88,10 +88,8 @@ void chunked_reader::set_filetype(compression_type arg_filetype) void set_filtetype(compression_type &filetype_arg); -size_t chunked_reader::read(char *arg_buffer, size_t buffer_size) +size_t chunked_reader_old::read(char *arg_buffer, size_t buffer_size) { - - buffer_size = std::min(buffer_size, (size_t) READ_BUFFER_SIZE); size_t written = 0; @@ -118,7 +116,7 @@ size_t chunked_reader::read(char *arg_buffer, size_t buffer_size) this->update_zstd_buffer(); break; default: - throw std::runtime_error("[chunked_reader::read] reading from uninitialized object\n"); + throw std::runtime_error("[chunked_reader_old::read] reading from uninitialized object\n"); break; } @@ -141,7 +139,7 @@ size_t chunked_reader::read(char *arg_buffer, size_t buffer_size) -size_t chunked_reader::read(unsigned char *arg_buffer, size_t buffer_size) +size_t chunked_reader_old::read(unsigned char *arg_buffer, size_t buffer_size) { @@ -163,7 +161,7 @@ size_t chunked_reader::read(unsigned char *arg_buffer, size_t buffer_size) this->update_zstd_buffer(); break; default: - throw std::runtime_error("[chunked_reader::read] reading from uninitialized object\n"); + throw std::runtime_error("[chunked_reader_old::read] reading from uninitialized object\n"); break; } @@ -179,7 +177,7 @@ size_t chunked_reader::read(unsigned char *arg_buffer, size_t buffer_size) // reads single byte from the buffer -unsigned char chunked_reader::read() +unsigned char chunked_reader_old::read() { if(this->buffer_i >= this->buffer_n) { switch(this->filetype) { @@ -190,7 +188,7 @@ unsigned char chunked_reader::read() this->update_zstd_buffer(); break; default: - throw std::runtime_error("[chunked_reader::read] reading from uninitialized object\n"); + throw std::runtime_error("[chunked_reader_old::read] reading from uninitialized object\n"); break; } } @@ -201,7 +199,7 @@ unsigned char chunked_reader::read() -void chunked_reader::update_flat_buffer() +void chunked_reader_old::update_flat_buffer() { this->fh_flat->read(this->buffer, READ_BUFFER_SIZE); @@ -211,7 +209,7 @@ void chunked_reader::update_flat_buffer() } -void chunked_reader::update_zstd_buffer() +void chunked_reader_old::update_zstd_buffer() { //size_t written = ZSTD_seekable_decompressFile_orDie(this->filename.c_str(), this->file_i, this->buffer, this->file_i + READ_BUFFER_SIZE); size_t written = ZSTD_seekable_decompressFile_orDie(this->fh_zstd, this->file_i, this->buffer, this->file_i + READ_BUFFER_SIZE); @@ -223,7 +221,7 @@ void chunked_reader::update_zstd_buffer() -void chunked_reader::seek(off_t offset) +void chunked_reader_old::seek(off_t offset) { this->file_i = offset; @@ -245,7 +243,7 @@ void chunked_reader::seek(off_t offset) } -size_t chunked_reader::tell() +size_t chunked_reader_old::tell() { //@todo decide what to return when out of bound //e.g. when exceeding file size @@ -255,13 +253,13 @@ size_t chunked_reader::tell() -void State::set_context(Context *arg_context) +void State::set_context(chunked_reader *arg_context) { this->context = arg_context; } // This does not read the actual flat file, this copies its internal buffer to arg_buffer_to -size_t State::read(char *arg_buffer_to, size_t arg_buffer_to_size, +size_t State::read(unsigned char *arg_buffer_to, size_t arg_buffer_to_size, size_t &buffer_i, size_t &buffer_n) { #if DEBUG @@ -273,7 +271,6 @@ size_t State::read(char *arg_buffer_to, size_t arg_buffer_to_size, size_t written = 0; const size_t n1 = std::min(buffer_n - buffer_i, arg_buffer_to_size);// number of characters to copy - printf("a. buffer_i = %i buffer_n = %i n1 = %i written = %i arg_buffer_to_size = %i\n", buffer_i, buffer_n , n1, written, arg_buffer_to_size); // copy current internal buffer completely while(written < n1) @@ -281,23 +278,15 @@ size_t State::read(char *arg_buffer_to, size_t arg_buffer_to_size, arg_buffer_to[written++] = this->context->get_buffer()[buffer_i++]; } - printf("b. buffer_i = %i buffer_n = %i n1 = %i written = %i arg_buffer_to_size = %i\n", buffer_i, buffer_n , n1, written, arg_buffer_to_size); if(written < arg_buffer_to_size) { - printf("true\n"); this->context->cache_buffer();// needs to set n to 0 - printf("c. buffer_i = %i buffer_n = %i n1 = %i written = %i arg_buffer_to_size = %i\n", buffer_i, buffer_n , n1, written, arg_buffer_to_size); while(buffer_i < buffer_n and written < arg_buffer_to_size) { arg_buffer_to[written++] = this->context->get_buffer()[buffer_i++]; } - printf("d. buffer_i = %i buffer_n = %i n1 = %i written = %i arg_buffer_to_size = %i\n", buffer_i, buffer_n , n1, written, arg_buffer_to_size); - - - //printf("recursively call another read :: %i\n", n2); } - printf("e. buffer_i = %i buffer_n = %i n1 = %i written = %i arg_buffer_to_size = %i\n", buffer_i, buffer_n , n1, written, arg_buffer_to_size); return written; @@ -307,22 +296,50 @@ size_t State::read(char *arg_buffer_to, size_t arg_buffer_to_size, -Context::Context(const char * arg_filename) : filename(arg_filename), buffer("\0"), buffer_i(0), buffer_n(0), file_i(0), state(nullptr) +chunked_reader::chunked_reader(const char * arg_filename) : filename(arg_filename), buffer("\0"), buffer_i(0), buffer_n(0), file_i(0), state(nullptr) { this->TransitionTo(this->find_state()); } -const std::string& Context::get_filename() +chunked_reader::~chunked_reader() +{ + delete this->state; +} + + + +const std::string& chunked_reader::get_filename() { return this->filename; } -char * Context::get_buffer() +char * chunked_reader::get_buffer() +{ + return this->buffer; +} + + +//@todo remove and use typeid only +compression_type chunked_reader::get_filetype() { - return &(this->buffer[0]); + if(this->typeid_state() == typeid(ContextUncompressed)) + { + return compression_type::uncompressed; + } + else if(this->typeid_state() == typeid(ContextZstdSeekable)) + { + return compression_type::zstd; + } + else + { + return compression_type::undefined; + } } -size_t Context::cache_buffer() + + + +size_t chunked_reader::cache_buffer() { size_t s = this->state->cache_buffer(); this->buffer_n = s; @@ -331,21 +348,29 @@ size_t Context::cache_buffer() this->file_i += s; } -size_t Context::read(char *arg_buffer, size_t arg_buffer_size) +size_t chunked_reader::read(unsigned char *arg_buffer, size_t arg_buffer_size) { //arg_buffer_size = std::min(arg_buffer_size, (size_t) READ_BUFFER_SIZE); #if DEBUG + + if(arg_buffer == nullptr) + { + throw std::runtime_error("[chunked_reader::read] Invalid / not allocated buffer.\n"); + } + if(arg_buffer_size > READ_BUFFER_SIZE) { - throw std::runtime_error("[ContextUncompressed::read] Requested buffer size larger than internal context buffer.\n"); + throw std::runtime_error("[chunked_reader::read] Requested buffer size larger than internal context buffer.\n"); } + #endif //DEBUG - + return this->state->read(arg_buffer, arg_buffer_size, this->buffer_i, this->buffer_n); + } -void Context::TransitionTo(State *arg_state) { +void chunked_reader::TransitionTo(State *arg_state) { if(this->state != nullptr) { @@ -356,14 +381,14 @@ void Context::TransitionTo(State *arg_state) { this->state->set_context(this); } -void Context::fopen(off_t file_offset) +void chunked_reader::fopen(off_t file_offset) { this->state->fopen(file_offset); // open file handle this->cache_buffer(); // read into buffer } -void Context::seek(off_t arg_offset) +void chunked_reader::seek(off_t arg_offset) { this->file_i = arg_offset; // @todo obtain return value from this->state->seek() and limit this this->state->seek(arg_offset);// set file pointer @@ -372,23 +397,23 @@ void Context::seek(off_t arg_offset) // positio in the (decompressed) file -size_t Context::tell() +size_t chunked_reader::tell() { - printf("Context :: tell: %i - %i + %i = %i\n", - this->file_i , - this->buffer_n , - this->buffer_i , - this->file_i - this->buffer_n + this->buffer_i); + //printf("Context :: tell: %i - %i + %i = %i\n", + //this->file_i , + //this->buffer_n , + //this->buffer_i , + //this->file_i - this->buffer_n + this->buffer_i); return this->file_i - this->buffer_n + this->buffer_i; } -size_t Context::get_file_i() +size_t chunked_reader::get_file_i() { return this->file_i; } -const std::type_info& Context::typeid_state() +const std::type_info& chunked_reader::typeid_state() { return typeid(*this->state); // somehow pointer is needed to return ContextSubvariant rather than State } @@ -396,7 +421,7 @@ const std::type_info& Context::typeid_state() -State *Context::find_state() +State *chunked_reader::find_state() { if(is_zstd_file(this->filename.c_str())) { @@ -411,6 +436,10 @@ State *Context::find_state() void ContextUncompressed::fopen(off_t start_pos = 0) { + if(this->fh == nullptr) + { + throw std::runtime_error("[ContextUncompressed::fopen] empty fh?.\n"); + } if(this->fh != nullptr) { throw std::runtime_error("[ContextUncompressed::fopen] opening a non closed reader.\n"); @@ -426,7 +455,7 @@ void ContextUncompressed::fopen(off_t start_pos = 0) } else { - throw std::runtime_error("[chunked_reader::init] Cannot open file for reading.\n"); + throw std::runtime_error("[chunked_reader_old::init] Cannot open file for reading.\n"); } } @@ -437,17 +466,33 @@ size_t ContextUncompressed::cache_buffer() { throw std::runtime_error("ContextUncompressed::cache_buffer\n"); } + + if(this->context->get_buffer() == nullptr) + { + throw std::runtime_error("ContextUncompressed::cache_buffer - no valid buffer?\n"); + } #endif //DEBUG this->fh->read(this->context->get_buffer(), READ_BUFFER_SIZE); size_t s = (size_t) this->fh->gcount(); + /*printf("context uncompressed cache_buffer: %i\n", (int) s); + printf("%02hhX %02hhX %02hhX %02hhX %02hhX %02hhX %02hhX %02hhX\n", + this->context->get_buffer()[0], + this->context->get_buffer()[1], + this->context->get_buffer()[2], + this->context->get_buffer()[3], + this->context->get_buffer()[4], + this->context->get_buffer()[5], + this->context->get_buffer()[6], + this->context->get_buffer()[7] + );*/ if(this->fh->eof()) { this->fh->clear(); this->fh->seekg(0, std::ios::end); } - + return s; } @@ -493,8 +538,6 @@ size_t ContextZstdSeekable::cache_buffer() // figure out the location in the decompressed file - printf("[%i] << file_i \n",this->context->get_file_i()); - size_t written = ZSTD_seekable_decompressFile_orDie( this->fh, this->context->get_file_i(), //this->context->file_i, @@ -568,5 +611,15 @@ void ContextZstdSeekable::seek(off_t arg_offset) ContextZstdSeekable::~ContextZstdSeekable() { - throw std::runtime_error("[ContextUncompressed::~ContextUncompressed] not implemented.\n"); + if(this->fh != nullptr) { + //ZSTD_seekable_free(this->fh_zstd->seekable); + fclose_orDie(this->fh->fin); + + //delete this->fh_zstd->seekable; + //delete this->fh_zstd->fin; + + delete this->fh; + } + + //throw std::runtime_error("[ContextUncompressed::~ContextUncompressed] not implemented.\n"); } diff --git a/src/fasta_to_fastafs.cpp b/src/fasta_to_fastafs.cpp index 4b67d4b3..4d82ca49 100644 --- a/src/fasta_to_fastafs.cpp +++ b/src/fasta_to_fastafs.cpp @@ -111,7 +111,7 @@ void fasta_to_fastafs_seq::finish_sequence(std::ofstream &fh_fastafs) } #endif //DEBUG - char buffer[4 + 1]; + unsigned char buffer[4 + 1]; // (over)write number nucleotides std::streamoff index_file_position = fh_fastafs.tellp(); @@ -1347,7 +1347,7 @@ size_t fasta_to_fastafs(const std::string &fasta_file, const std::string &fastaf // write index/footer unsigned int index_file_position = (uint32_t) fh_fastafs.tellp(); - char buffer[4 + 1]; + unsigned char buffer[4 + 1]; uint_to_fourbytes(buffer, (uint32_t) index.size()); fh_fastafs.write(reinterpret_cast(&buffer), (size_t) 4); @@ -1402,7 +1402,7 @@ size_t fasta_to_fastafs(const std::string &fasta_file, const std::string &fastaf // close fastafs, calc crc32, re-open and save fh_fastafs.close(); uint32_t crc32c = file_crc32(fastafs_file, 4, written); - char byte_enc[5] = "\x00\x00\x00\x00"; + unsigned char byte_enc[5] = "\x00\x00\x00\x00"; uint_to_fourbytes(byte_enc, (uint32_t) crc32c); std::ofstream fh_fastafs2(fastafs_file.c_str(), std::ios::out | std::ios::binary | std::ios::app); if(fh_fastafs2.is_open()) { diff --git a/src/fastafs.cpp b/src/fastafs.cpp index e52b8b8f..e3ac4d08 100644 --- a/src/fastafs.cpp +++ b/src/fastafs.cpp @@ -639,24 +639,37 @@ fastafs::~fastafs() void fastafs::load(std::string afilename) { + printf("aa\n"); std::streampos size; - char *memblock; - + unsigned char *memblock; + printf("ab\n"); + chunked_reader fh_in = chunked_reader(afilename.c_str()); + printf("ac\n"); + { + printf("ad\n"); + fh_in.fopen(0); + printf("ae\n"); + this->filetype = fh_in.get_filetype(); + printf("af\n"); + + - memblock = new char [20 + 1]; //sha1 is 20b + memblock = new unsigned char [20 + 1]; //sha1 is 20b // if a user can't compile this line, please replace it with C's // 'realpath' function and delete/free afterwards and send a PR //this->filename = std::filesystem::canonical(afilename);// this path must be absolute because if stuff gets send to FUSE, paths are relative to the FUSE process and probably systemd initialization this->filename = realpath_cpp(afilename); + size = (size_t) fh_in.read(memblock, 16); if(size < 16) { //file.close(); throw std::invalid_argument("Corrupt file: " + filename); } else { + fh_in.seek(0); uint32_t i; @@ -664,6 +677,7 @@ void fastafs::load(std::string afilename) fh_in.read(memblock, 14); memblock[16] = '\0'; + // check magic for(i = 0 ; i < 4; i++) { if(memblock[i] != FASTAFS_MAGIC[i]) { @@ -703,10 +717,10 @@ void fastafs::load(std::string afilename) // name size_t namesize = (unsigned char) memblock[0]; // cast to something that is large enough (> 128) //char name[namesize + 1]; - char *name = new char[namesize + 1]; + unsigned char *name = new unsigned char[namesize + 1]; fh_in.read(name, namesize); name[(unsigned char) memblock[0]] = '\0'; - s->name = std::string(name); + s->name = std::string(reinterpret_cast(name)); // set cursor and save sequence data position fh_in.read(memblock, 4); @@ -797,6 +811,7 @@ void fastafs::view_fasta(ffs2f_init* cache) //std::ifstream file(this->filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate); //if(file.is_open()) { chunked_reader fh = chunked_reader(this->filename.c_str()); + fh.fopen(0); for(uint32_t i = 0; i < this->data.size(); i++) { this->data[i]->view_fasta(cache->sequences[i], fh); @@ -860,6 +875,7 @@ uint32_t fastafs::view_sequence_region(ffs2f_init* cache, const char *seq_region #endif chunked_reader fh = chunked_reader(this->filename.c_str()); + fh.fopen(0); //std::ifstream file(this->filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate); //if(file.is_open()) { // parse "chr..:..-.." string @@ -891,10 +907,13 @@ uint32_t fastafs::view_sequence_region(ffs2f_init* cache, const char *seq_region */ uint32_t fastafs::view_fasta_chunk(ffs2f_init* cache, char *buffer, size_t buffer_size, off_t file_offset) { - chunked_reader fh = chunked_reader(this->filename.c_str()); + fh.fopen(0); + + uint32_t s = this->view_fasta_chunk(cache, buffer, buffer_size, file_offset, fh); + //#printf("%02hhX %02hhX %02hhX %02hhX\n", buffer[0], buffer[1], buffer[2], buffer[3]); - return this->view_fasta_chunk(cache, buffer, buffer_size, file_offset, fh); + return s; } @@ -953,6 +972,7 @@ uint32_t fastafs::view_ucsc2bit_chunk(char *buffer, size_t buffer_size, off_t fi //std::ifstream file(this->filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate); //if(file.is_open()) { chunked_reader file = chunked_reader(this->filename.c_str()); + file.fopen(0); char n_seq[4]; pos_limit += 4;// skip this loop after writing first four bytes while(pos < pos_limit) { @@ -1675,12 +1695,12 @@ bool fastafs::check_file_integrity(bool verbose) { uint32_t crc32_current = this->get_crc32(); - char buf_old[5] = "\x00\x00\x00\x00"; + unsigned char buf_old[5] = "\x00\x00\x00\x00"; uint_to_fourbytes(buf_old, (uint32_t) this->crc32f); if(crc32_current != this->crc32f) { - char buf_new[5] = "\x00\x00\x00\x00"; + unsigned char buf_new[5] = "\x00\x00\x00\x00"; uint_to_fourbytes(buf_new, (uint32_t) crc32_current); if(verbose) { @@ -1726,6 +1746,7 @@ bool fastafs::check_sequence_integrity(bool verbose) ffs2f_init* cache = this->init_ffs2f(0, false);// do not use masking, this checksum requires capital / upper case nucleotides chunked_reader file = chunked_reader(this->filename.c_str()); + file.fopen(0); //std::ifstream file(this->filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate); //if(file.is_open()) { for(uint32_t i = 0; i < this->data.size(); i++) { diff --git a/src/flags.cpp b/src/flags.cpp index 7a468c77..eb90e1e7 100644 --- a/src/flags.cpp +++ b/src/flags.cpp @@ -16,7 +16,7 @@ twobit_flag::twobit_flag() -void twobit_flag::set(char *data) +void twobit_flag::set(unsigned char *data) { this->bits[0] = data[0]; this->bits[1] = data[1]; diff --git a/src/ucsc2bit.cpp b/src/ucsc2bit.cpp index 83ef6221..e4fbb812 100644 --- a/src/ucsc2bit.cpp +++ b/src/ucsc2bit.cpp @@ -246,7 +246,7 @@ void ucsc2bit::load(std::string afilename) file.close(); throw std::invalid_argument("Corrupt file: " + filename); } else { - char *memblock = new char [20 + 1]; // buffer + unsigned char *memblock = new unsigned char [20 + 1]; // buffer if(memblock == 0) { throw std::invalid_argument("Could not alloc\n"); } @@ -256,7 +256,7 @@ void ucsc2bit::load(std::string afilename) uint32_t i; // HEADER - if(!file.read(memblock, 16)) { + if(!file.read((char *) &memblock[0], 16)) { delete[] memblock; throw std::invalid_argument("Corrupt, unreadable or truncated file (early EOF): " + filename); } @@ -293,7 +293,7 @@ void ucsc2bit::load(std::string afilename) s = new ucsc2bit_seq; // name length - if(!file.read(memblock, 1)) { + if(!file.read((char *) &memblock[0], 1)) { delete[] memblock; throw std::invalid_argument("Corrupt, unreadable or truncated file (early EOF): " + filename); } @@ -310,7 +310,7 @@ void ucsc2bit::load(std::string afilename) s->name = std::string(name); // file offset for seq-block - if(!file.read(memblock, 4)) { + if(!file.read((char *) &memblock[0], 4)) { delete[] memblock; throw std::invalid_argument("Corrupt, unreadable or truncated file (early EOF): " + filename); } @@ -325,11 +325,11 @@ void ucsc2bit::load(std::string afilename) s = data[i]; file.seekg(s->data_position, std::ios::beg); - file.read(memblock, 4); + file.read((char *) &memblock[0], 4); s->n = fourbytes_to_uint_ucsc2bit(memblock, 0); // n blocks - if(!file.read(memblock, 4)) { + if(!file.read((char *) &memblock[0], 4)) { delete[] memblock; throw std::invalid_argument("Corrupt, unreadable or truncated file (early EOF): " + filename); } @@ -337,7 +337,7 @@ void ucsc2bit::load(std::string afilename) s->n_starts.resize(n_blocks); s->n_ends.resize(n_blocks); for(j = 0; j < n_blocks; j++) { - file.read(memblock, 8); + file.read((char *) &memblock[0], 8); uint32_t n_block_s = fourbytes_to_uint_ucsc2bit(memblock, 0); s->n_starts[j] = n_block_s; @@ -345,7 +345,7 @@ void ucsc2bit::load(std::string afilename) } // m blocks - if(!file.read(memblock, 4)) { + if(!file.read((char *) &memblock[0], 4)) { delete[] memblock; throw std::invalid_argument("Corrupt, unreadable or truncated file (early EOF): " + filename); } @@ -353,7 +353,7 @@ void ucsc2bit::load(std::string afilename) s->m_starts.resize(m_blocks); s->m_ends.resize(m_blocks); for(j = 0; j < m_blocks; j++) { - file.read(memblock, 8); + file.read((char *) &memblock[0], 8); uint32_t m_block_s = fourbytes_to_uint_ucsc2bit(memblock, 0); s->m_starts[j] = m_block_s; diff --git a/src/ucsc2bit_to_fastafs.cpp b/src/ucsc2bit_to_fastafs.cpp index abaa4822..a108da11 100644 --- a/src/ucsc2bit_to_fastafs.cpp +++ b/src/ucsc2bit_to_fastafs.cpp @@ -26,7 +26,7 @@ size_t ucsc2bit_to_fastafs(std::string ucsc2bit_file, std::string fastafs_file) const char ng[2] = "G"; const char nn[2] = "N"; - char buffer[16 + 1]; + unsigned char buffer[16 + 1]; fastafs fs_new = fastafs(""); uint32_t i, j, n; @@ -51,7 +51,7 @@ size_t ucsc2bit_to_fastafs(std::string ucsc2bit_file, std::string fastafs_file) fh_fastafs << "\x00\x00\x00\x00"s;// position of metedata ~ unknown YET // Read UCSC2bit header (n seq) - fh_ucsc2bit.read(buffer, 12); + fh_ucsc2bit.read( (char*)( &buffer[0]) , 12);//conversion from unsigned char* to char* (https://stackoverflow.com/questions/604431/c-reading-unsigned-char-from-file-stream) n = fourbytes_to_uint_ucsc2bit(buffer, 8); uint_to_fourbytes(buffer, n); std::vector data(n); @@ -66,15 +66,15 @@ size_t ucsc2bit_to_fastafs(std::string ucsc2bit_file, std::string fastafs_file) data[i] = s; data2[i] = t; - fh_ucsc2bit.read(buffer, 1); + fh_ucsc2bit.read((char*)&buffer[0], 1); s->name_size = buffer[0]; - fh_ucsc2bit.read(buffer, s->name_size); + fh_ucsc2bit.read((char*)&buffer[0], s->name_size); s->name = new char[s->name_size + 1]; - strncpy(s->name, buffer, s->name_size); + strncpy(s->name, (char*)&buffer[0], s->name_size); s->name[s->name_size] = '\0'; - fh_ucsc2bit.read(buffer, 4); + fh_ucsc2bit.read((char*)&buffer[0], 4); s->offset = fourbytes_to_uint_ucsc2bit(buffer, 0); } for(i = 0 ; i < n; i ++) { @@ -84,31 +84,31 @@ size_t ucsc2bit_to_fastafs(std::string ucsc2bit_file, std::string fastafs_file) t = data2[i]; t->file_offset_dna_in_ucsc2bit = fh_fastafs.tellp(); - fh_ucsc2bit.read(buffer, 4); + fh_ucsc2bit.read((char*)&buffer[0], 4); s->dna_size = fourbytes_to_uint_ucsc2bit(buffer, 0); // parse N blocks - fh_ucsc2bit.read(buffer, 4); + fh_ucsc2bit.read((char*)&buffer[0], 4); s->n_blocks = fourbytes_to_uint_ucsc2bit(buffer, 0); for(j = 0; j < s->n_blocks; j++) { - fh_ucsc2bit.read(buffer, 4); + fh_ucsc2bit.read((char*)&buffer[0], 4); s->n_block_starts.push_back(fourbytes_to_uint_ucsc2bit(buffer, 0)); } for(j = 0; j < s->n_blocks; j++) { - fh_ucsc2bit.read(buffer, 4); + fh_ucsc2bit.read((char*)&buffer[0], 4); s->n_block_sizes.push_back(fourbytes_to_uint_ucsc2bit(buffer, 0)); t->N += s->n_block_sizes.back();//ucsc2bit provides lengths } // parse M blocks - fh_ucsc2bit.read(buffer, 4); + fh_ucsc2bit.read((char*)&buffer[0], 4); s->m_blocks = fourbytes_to_uint_ucsc2bit(buffer, 0); for(j = 0; j < s->m_blocks; j++) { - fh_ucsc2bit.read(buffer, 4); + fh_ucsc2bit.read((char*)&buffer[0], 4); s->m_block_starts.push_back(fourbytes_to_uint_ucsc2bit(buffer, 0)); } for(j = 0; j < s->m_blocks; j++) { - fh_ucsc2bit.read(buffer, 4); + fh_ucsc2bit.read((char*)&buffer[0], 4); s->m_block_sizes.push_back(fourbytes_to_uint_ucsc2bit(buffer, 0)); } @@ -117,7 +117,7 @@ size_t ucsc2bit_to_fastafs(std::string ucsc2bit_file, std::string fastafs_file) fh_fastafs.write(reinterpret_cast(&buffer), (size_t) 4); // parse and convert sequence - fh_ucsc2bit.read(buffer, 4); + fh_ucsc2bit.read((char*)&buffer[0], 4); twobit_byte t_in = twobit_byte(ENCODE_HASH_TWOBIT_DNA); const char *decoded_in = t_in.encode_hash[0];// unnecessary initialization but otherwise gcc whines twobit_byte t_out = twobit_byte(ENCODE_HASH_TWOBIT_DNA); @@ -133,7 +133,7 @@ size_t ucsc2bit_to_fastafs(std::string ucsc2bit_file, std::string fastafs_file) } for(j = 0; j < s->dna_size; j++) { if(j % 4 == 0) { - fh_ucsc2bit.read(buffer, 1); + fh_ucsc2bit.read((char*)&buffer[0], 1); t_in.data = buffer[0]; decoded_in = t_in.get();// pointer to the right value? } @@ -272,7 +272,7 @@ size_t ucsc2bit_to_fastafs(std::string ucsc2bit_file, std::string fastafs_file) f.load(fastafs_file); uint32_t crc32c = f.get_crc32(); - char byte_enc[5] = "\x00\x00\x00\x00"; + unsigned char byte_enc[5] = "\x00\x00\x00\x00"; uint_to_fourbytes(byte_enc, (uint32_t) crc32c); //printf("[%i][%i][%i][%i] input!! \n", byte_enc[0], byte_enc[1], byte_enc[2], byte_enc[3]); fh_fastafs.write(reinterpret_cast(&byte_enc), (size_t) 4); diff --git a/src/utils.cpp b/src/utils.cpp index 3ecb5c85..c85ac611 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -14,7 +14,7 @@ #include "config.hpp" // as these chars are coming from ifstream.read -uint32_t fourbytes_to_uint(char *chars, unsigned char offset) +uint32_t fourbytes_to_uint(unsigned char *chars, unsigned char offset) { uint32_t u = ((unsigned char) chars[0 + offset] << 24) | ((unsigned char) chars[1 + offset] << 16) | ((unsigned char) chars[2 + offset] << 8) | ((unsigned char) chars[3 + offset]); return u; @@ -36,7 +36,7 @@ the equivalent of 129 in their encoding is as follows: The function below decodes these 4 charred strings into an uint32_teger */ -uint32_t fourbytes_to_uint_ucsc2bit(char *chars, unsigned char offset) +uint32_t fourbytes_to_uint_ucsc2bit(unsigned char *chars, unsigned char offset) { uint32_t u = ((unsigned char) chars[3 + offset] << 24) | ((unsigned char) chars[2 + offset] << 16) | ((unsigned char) chars[1 + offset] << 8) | ((unsigned char) chars[0 + offset]); @@ -84,7 +84,7 @@ size_t remove_chars(char *s, int c, size_t l) } -void uint_to_fourbytes(char *chars, uint32_t n) +void uint_to_fourbytes(unsigned char *chars, uint32_t n) { chars[0] = (signed char)((n >> 24) & 0xFF); chars[1] = (signed char)((n >> 16) & 0xFF); diff --git a/src/xbit_byte_encoder.cpp b/src/xbit_byte_encoder.cpp index 90e9955d..29384592 100644 --- a/src/xbit_byte_encoder.cpp +++ b/src/xbit_byte_encoder.cpp @@ -9,6 +9,10 @@ void xbit_byte_encoder::next(chunked_reader &r) { - this->data = r.read(); + unsigned char *buf = new unsigned char[2]; + r.read(buf, 1); + this->data = buf[0]; + + delete[] buf; } diff --git a/test/cache/test_cache.cpp b/test/cache/test_cache.cpp index 97785a7c..b1acee0b 100644 --- a/test/cache/test_cache.cpp +++ b/test/cache/test_cache.cpp @@ -231,136 +231,142 @@ BOOST_AUTO_TEST_CASE(Test_size) */ BOOST_AUTO_TEST_CASE(test_cache) { - size_t written = fasta_to_fastafs("test/data/test.fa", "tmp/test_cache_test.fastafs", false); - - static std::string reference = - // GENERIC-HEADER - "\x0F\x0A\x46\x53"s// [0, 3] - "\x00\x00\x00\x00"s// [4, 7] version - "\x80\x00"s// [8, 9] FASTAFS flag [ 10000000 | 00000000 ] - "\x00\x00\x01\x37"s // [10, 13] index position in file (153) - - // DATA - "\x00\x00\x00\x10"s// [14, 17] seq length (16) (of 2bit encoded bytes; n-blocks are excluded) - "\x00\x55\xAA\xFF"s// [18, 21] sequence - "\x00\x00\x00\x00"s// [22, 25] n-blocks (0) - "\x75\x25\x5C\x6D\x90\x77\x89\x99\xAD\x36\x43\xA2\xE6\x9D\x43\x44"s// [26, 41] checksum - "\x00\x00\x00\x01"s// [42, ] m-blocks (1) - "\x00\x00\x00\x00"s// [50, 53] m-block starts (0) - "\x00\x00\x00\x0F"s// [54, 57] m-block starts (15) - "\x00\x00\x00\x0C"s// [58, 61] seq length (12) (of 2bit encoded bytes; n-blocks are excluded) - "\x93\x93\x93"s// [62, 64] sequence: ACTG ACTG nnnn ACTG = 10010011 10010011 00000000 10010011 = \x93 \x93 \x00 \x93 - "\x00\x00\x00\x01"s// [65, 68] n-blocks (1) - "\x00\x00\x00\x08"s// [69, 72] n-block start[1] (08) - "\x00\x00\x00\x0B"s// [73, 76] n-block ends[1] (11) - "\x8B\x56\x73\x72\x4A\x99\x65\xC2\x9A\x1D\x76\xFE\x70\x31\xAC\x8A"s// [69, 96] checksum - "\x00\x00\x00\x01"s// [97, 100] m-blocks (0) - "\x00\x00\x00\x08"s// [101, 104] m-block starts (8) - "\x00\x00\x00\x0B"s// [105, 108] m-block starts (11) - "\x00\x00\x00\x0D"s// [109, 112] seq length (13) (needs to become 2bit-encoded seq-len) - "\x93\x93\xAA\x40"s// [113, 116] sequence: last one is 01 00 00 00 - "\x00\x00\x00\x00"s// [117, 120] n-blocks (0) - "\x61\xDE\xBA\x32\xEC\x4C\x35\x76\xE3\x99\x8F\xA2\xD4\xB8\x72\x88"s// [121, 140] checksum - "\x00\x00\x00\x01"s// [141, 144] m-blocks (0) - "\x00\x00\x00\x08"s// [145, 148] m-block starts (8) - "\x00\x00\x00\x0C"s// [149, 152] m-block starts (12) - "\x00\x00\x00\x0E"s// [153, 156] seq length (14) (of 2bit encoded bytes; n-blocks are excluded) - "\x93\x93\xAA\x50"s// [157, 160] last one is 01 01 00 00 - "\x00\x00\x00\x00"s// [161, 164] n-bocks (0) - "\x99\xB9\x05\x60\xF2\x3C\x1B\xDA\x28\x71\xA6\xC9\x3F\xD6\xA2\x40"s// [165, 184] checksum - "\x00\x00\x00\x01"s// [185, 188] m-blocks (0) - "\x00\x00\x00\x08"s// [189, 192] m-block starts (8) - "\x00\x00\x00\x0D"s// [193, 196] m-block starts (13) - "\x00\x00\x00\x0F"s// [197, 200] seq length (15) (of 2bit encoded nucleotides; n-blocks are excluded) - "\x93\x93\xAA\x54"s// [201, 204] last one is 01 01 01 00 - "\x00\x00\x00\x00"s// [205, 208] n-blocks (0) - "\x36\x25\xAF\xDF\xBE\xB4\x37\x65\xB8\x5F\x61\x2E\x0A\xCB\x47\x39"s// [209, 228] checksum - "\x00\x00\x00\x01"s// [229, 232] m-blocks (0) - "\x00\x00\x00\x08"s// [233, 236] m-block starts (8) - "\x00\x00\x00\x0E"s// [237, 240] m-block starts (14) - "\x00\x00\x00\x04"s// [241, 244] seq length (4) (of 2bit encoded nucleotides; n-blocks are excluded) - "\x93"s// [245, 245] sequence: ACTG NNNN = 10010011 00000000 - "\x00\x00\x00\x01"s// [246, 249] n-blocks (1) - "\x00\x00\x00\x04"s// [250, 253] n-starts [1] (4) - "\x00\x00\x00\x07"s// [254, 257] n-ends [1] (7) - "\xBD\x8C\x08\x0E\xD2\x5B\xA8\xA4\x54\xD9\x43\x4C\xB8\xD1\x4A\x68"s// [258, 277] checksum - "\x00\x00\x00\x01"s// [278, 281] m-blocks (0) - "\x00\x00\x00\x04"s// [282, 285] m-block starts (4) - "\x00\x00\x00\x07"s// [286, 289] m-block starts (7) - "\x00\x00\x00\x04"s// [290, 293] seq length (4) (of 2bit encoded nucleotides; n-blocks are excluded) - "\x93"s// [294, 294] sequence: NNAC TG?? = 00001001 00110000 - "\x00\x00\x00\x01"s// [295, 298] n-blocks (1) - "\x00\x00\x00\x00"s// [299, 302] n-starts[1] (0) - "\x00\x00\x00\x01"s// [303, 306] n-ends[1] (1) - "\x98\x0E\xF3\xA1\xCD\x80\xAF\xEC\x95\x9D\xCF\x85\x2D\x02\x62\x46"s// [307, 326] checksum - "\x00\x00\x00\x01"s// [327, 330] m-blocks (0) - "\x00\x00\x00\x00"s// [331, 334] m-block starts (0) - "\x00\x00\x00\x01"s// [335, 338] m-block starts (1) - - // INDEX - "\x00\x00\x00\x07"s // [339, 342] 7 sequences - "\x010\x00" // [343, 344] complete, DNA and not circular - "\x04"s "chr1"s // [345, 349] name - "\x00\x00\x00\x0E"s // [350, 353] data position in file (14) - "\x010\x00" // [354, 355] complete, DNA and not circular - "\x04"s "chr2"s // [356, 360] name - "\x00\x00\x00\x36"s // [361, 364] data position in file (54) - "\x010\x00" // [, ] complete, DNA and not circular - "\x06"s "chr3.1"s // [, ] name - "\x00\x00\x00\x65"s // [, ] data position in file (101) - "\x010\x00" // [, ] complete, DNA and not circular - "\x06"s "chr3.2"s // [, ] name - "\x00\x00\x00\x8D"s // [, ] data position in file (141) - "\x010\x00" // [, ] complete, DNA and not circular - "\x06"s "chr3.3"s // [, ] name - "\x00\x00\x00\xB5"s // [, ] data position in file (181) - "\x010\x00" // [, ] complete, DNA and not circular - "\x04"s "chr4"s // [, ] name - "\x00\x00\x00\xDD"s // [, ] data position in file (221) - "\x010\x00" // [, ] complete, DNA and not circular - "\x04"s "chr5"s // [, ] name - "\x00\x00\x01\x0A"s // [, ] data position in file (290) - - // METADATA - "\x00"s // [399] no metadata fields [padding will come soon?] - - // CRC32 checksums - "\x1e\x77\x77\x22"s - ; - - BOOST_CHECK_EQUAL(written, 403); - - //BOOST_CHECK(output.compare(uppercase) == 0 or output.compare(mixedcase) == 0); - std::ifstream file("tmp/test_cache_test.fastafs", std::ios::in | std::ios::binary | std::ios::ate); - BOOST_REQUIRE(file.is_open()); - - std::streampos size; - char * buffer; - size = file.tellg(); - buffer = new char [size]; - - file.seekg(0, std::ios::beg); - file.read(buffer, size); - file.close(); - for(unsigned int i = 0; i < size; i++) { - BOOST_CHECK_EQUAL(buffer[i], reference[i]); + { + size_t written = fasta_to_fastafs("test/data/test.fa", "tmp/test_cache_test.fastafs", false); + BOOST_CHECK_EQUAL(written, 403); + + static std::string reference = + // GENERIC-HEADER + "\x0F\x0A\x46\x53"s// [0, 3] + "\x00\x00\x00\x00"s// [4, 7] version + "\x80\x00"s// [8, 9] FASTAFS flag [ 10000000 | 00000000 ] + "\x00\x00\x01\x37"s // [10, 13] index position in file (153) + + // DATA + "\x00\x00\x00\x10"s// [14, 17] seq length (16) (of 2bit encoded bytes; n-blocks are excluded) + "\x00\x55\xAA\xFF"s// [18, 21] sequence + "\x00\x00\x00\x00"s// [22, 25] n-blocks (0) + "\x75\x25\x5C\x6D\x90\x77\x89\x99\xAD\x36\x43\xA2\xE6\x9D\x43\x44"s// [26, 41] checksum + "\x00\x00\x00\x01"s// [42, ] m-blocks (1) + "\x00\x00\x00\x00"s// [50, 53] m-block starts (0) + "\x00\x00\x00\x0F"s// [54, 57] m-block starts (15) + "\x00\x00\x00\x0C"s// [58, 61] seq length (12) (of 2bit encoded bytes; n-blocks are excluded) + "\x93\x93\x93"s// [62, 64] sequence: ACTG ACTG nnnn ACTG = 10010011 10010011 00000000 10010011 = \x93 \x93 \x00 \x93 + "\x00\x00\x00\x01"s// [65, 68] n-blocks (1) + "\x00\x00\x00\x08"s// [69, 72] n-block start[1] (08) + "\x00\x00\x00\x0B"s// [73, 76] n-block ends[1] (11) + "\x8B\x56\x73\x72\x4A\x99\x65\xC2\x9A\x1D\x76\xFE\x70\x31\xAC\x8A"s// [69, 96] checksum + "\x00\x00\x00\x01"s// [97, 100] m-blocks (0) + "\x00\x00\x00\x08"s// [101, 104] m-block starts (8) + "\x00\x00\x00\x0B"s// [105, 108] m-block starts (11) + "\x00\x00\x00\x0D"s// [109, 112] seq length (13) (needs to become 2bit-encoded seq-len) + "\x93\x93\xAA\x40"s// [113, 116] sequence: last one is 01 00 00 00 + "\x00\x00\x00\x00"s// [117, 120] n-blocks (0) + "\x61\xDE\xBA\x32\xEC\x4C\x35\x76\xE3\x99\x8F\xA2\xD4\xB8\x72\x88"s// [121, 140] checksum + "\x00\x00\x00\x01"s// [141, 144] m-blocks (0) + "\x00\x00\x00\x08"s// [145, 148] m-block starts (8) + "\x00\x00\x00\x0C"s// [149, 152] m-block starts (12) + "\x00\x00\x00\x0E"s// [153, 156] seq length (14) (of 2bit encoded bytes; n-blocks are excluded) + "\x93\x93\xAA\x50"s// [157, 160] last one is 01 01 00 00 + "\x00\x00\x00\x00"s// [161, 164] n-bocks (0) + "\x99\xB9\x05\x60\xF2\x3C\x1B\xDA\x28\x71\xA6\xC9\x3F\xD6\xA2\x40"s// [165, 184] checksum + "\x00\x00\x00\x01"s// [185, 188] m-blocks (0) + "\x00\x00\x00\x08"s// [189, 192] m-block starts (8) + "\x00\x00\x00\x0D"s// [193, 196] m-block starts (13) + "\x00\x00\x00\x0F"s// [197, 200] seq length (15) (of 2bit encoded nucleotides; n-blocks are excluded) + "\x93\x93\xAA\x54"s// [201, 204] last one is 01 01 01 00 + "\x00\x00\x00\x00"s// [205, 208] n-blocks (0) + "\x36\x25\xAF\xDF\xBE\xB4\x37\x65\xB8\x5F\x61\x2E\x0A\xCB\x47\x39"s// [209, 228] checksum + "\x00\x00\x00\x01"s// [229, 232] m-blocks (0) + "\x00\x00\x00\x08"s// [233, 236] m-block starts (8) + "\x00\x00\x00\x0E"s// [237, 240] m-block starts (14) + "\x00\x00\x00\x04"s// [241, 244] seq length (4) (of 2bit encoded nucleotides; n-blocks are excluded) + "\x93"s// [245, 245] sequence: ACTG NNNN = 10010011 00000000 + "\x00\x00\x00\x01"s// [246, 249] n-blocks (1) + "\x00\x00\x00\x04"s// [250, 253] n-starts [1] (4) + "\x00\x00\x00\x07"s// [254, 257] n-ends [1] (7) + "\xBD\x8C\x08\x0E\xD2\x5B\xA8\xA4\x54\xD9\x43\x4C\xB8\xD1\x4A\x68"s// [258, 277] checksum + "\x00\x00\x00\x01"s// [278, 281] m-blocks (0) + "\x00\x00\x00\x04"s// [282, 285] m-block starts (4) + "\x00\x00\x00\x07"s// [286, 289] m-block starts (7) + "\x00\x00\x00\x04"s// [290, 293] seq length (4) (of 2bit encoded nucleotides; n-blocks are excluded) + "\x93"s// [294, 294] sequence: NNAC TG?? = 00001001 00110000 + "\x00\x00\x00\x01"s// [295, 298] n-blocks (1) + "\x00\x00\x00\x00"s// [299, 302] n-starts[1] (0) + "\x00\x00\x00\x01"s// [303, 306] n-ends[1] (1) + "\x98\x0E\xF3\xA1\xCD\x80\xAF\xEC\x95\x9D\xCF\x85\x2D\x02\x62\x46"s// [307, 326] checksum + "\x00\x00\x00\x01"s// [327, 330] m-blocks (0) + "\x00\x00\x00\x00"s// [331, 334] m-block starts (0) + "\x00\x00\x00\x01"s// [335, 338] m-block starts (1) + + // INDEX + "\x00\x00\x00\x07"s // [339, 342] 7 sequences + "\x010\x00" // [343, 344] complete, DNA and not circular + "\x04"s "chr1"s // [345, 349] name + "\x00\x00\x00\x0E"s // [350, 353] data position in file (14) + "\x010\x00" // [354, 355] complete, DNA and not circular + "\x04"s "chr2"s // [356, 360] name + "\x00\x00\x00\x36"s // [361, 364] data position in file (54) + "\x010\x00" // [, ] complete, DNA and not circular + "\x06"s "chr3.1"s // [, ] name + "\x00\x00\x00\x65"s // [, ] data position in file (101) + "\x010\x00" // [, ] complete, DNA and not circular + "\x06"s "chr3.2"s // [, ] name + "\x00\x00\x00\x8D"s // [, ] data position in file (141) + "\x010\x00" // [, ] complete, DNA and not circular + "\x06"s "chr3.3"s // [, ] name + "\x00\x00\x00\xB5"s // [, ] data position in file (181) + "\x010\x00" // [, ] complete, DNA and not circular + "\x04"s "chr4"s // [, ] name + "\x00\x00\x00\xDD"s // [, ] data position in file (221) + "\x010\x00" // [, ] complete, DNA and not circular + "\x04"s "chr5"s // [, ] name + "\x00\x00\x01\x0A"s // [, ] data position in file (290) + + // METADATA + "\x00"s // [399] no metadata fields [padding will come soon?] + + // CRC32 checksums + "\x1e\x77\x77\x22"s + ; + + BOOST_REQUIRE_EQUAL(reference.length(), 403); + + //BOOST_CHECK(output.compare(uppercase) == 0 or output.compare(mixedcase) == 0); + std::ifstream file("tmp/test_cache_test.fastafs", std::ios::in | std::ios::binary | std::ios::ate); + BOOST_REQUIRE(file.is_open()); + + std::streampos size = file.tellg(); + char *buffer = new char[size]; + BOOST_REQUIRE(buffer != nullptr); + + file.seekg(0, std::ios::beg); + file.read(buffer, size); + file.close(); + for(unsigned int i = 0; i < size; i++) { + BOOST_CHECK_EQUAL(buffer[i], reference[i]); + /* + printf("comparing char %i\n", i); + if(reference[i] != buffer[i]) { + printf(" ** mismatch [%d] [ref] %d != [buf] %d (%c x %02hhX)\n", i, reference[i], buffer[i], buffer[i], buffer[i]); + } + */ - /* - printf("comparing char %i\n", i); - if(reference[i] != buffer[i]) { - printf(" ** mismatch [%d] [ref] %d != [buf] %d (%c x %02hhX)\n", i, reference[i], buffer[i], buffer[i], buffer[i]); - } - */ + } + delete[] buffer; } - delete[] buffer; - - - // check computed file size - fastafs f = fastafs(""); - f.load("tmp/test_cache_test.fastafs"); - BOOST_CHECK_EQUAL(f.fastafs_filesize(), 403); + { + // check computed file size + printf("test0\n"); + fastafs f = fastafs(""); + printf("test1\n"); + f.load("tmp/test_cache_test.fastafs"); + printf("test2\n"); + BOOST_CHECK_EQUAL(f.fastafs_filesize(), 403); + printf("test3\n"); + } } @@ -373,6 +379,7 @@ BOOST_AUTO_TEST_CASE(test_cache) */ BOOST_AUTO_TEST_CASE(test_cache_forwards_backwards) { + printf("test4\n"); // generate FASTAFS file from FASTA file fasta_to_fastafs("test/data/test.fa", "tmp/test_cache_test.fastafs", false); diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index faef0b2e..ba099ddd 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -1,5 +1,5 @@ -#define BOOST_TEST_MODULE fastfs_test_chunked_reader +#define BOOST_TEST_MODULE fastfs_test_chunked_reader_old #include @@ -17,7 +17,7 @@ -void flush_buffer(char *buffer, size_t n, char fill) +void flush_buffer(unsigned char *buffer, size_t n, unsigned char fill) { for(size_t i = 0; i < n; i++) { buffer[i] = fill; @@ -30,7 +30,7 @@ BOOST_AUTO_TEST_SUITE(Testing) -BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) +BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) { std::string test_name = "test"; std::string fasta_file = "test/data/" + test_name + ".fa"; @@ -44,7 +44,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) (unsigned) ZSTD_SEEKABLE_FRAME_SIZE); - char buffer[READ_BUFFER_SIZE + 1]; + unsigned char buffer[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); std::string std_buffer; buffer[1024] = '\0'; @@ -56,16 +56,16 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) { // old init - chunked_reader r_flat = chunked_reader(fastafs_file.c_str()); + chunked_reader_old r_flat = chunked_reader_old(fastafs_file.c_str()); // Context equivalent - uncompressed - Context c1(fastafs_file.c_str()); + chunked_reader c1(fastafs_file.c_str()); c1.fopen(0); BOOST_CHECK(c1.typeid_state() == typeid(ContextUncompressed)); BOOST_CHECK(c1.typeid_state() != typeid(ContextZstdSeekable)); // Context equivalent - compressed - Context c2(fastafs_file_zstd.c_str()); + chunked_reader c2(fastafs_file_zstd.c_str()); c2.fopen(0); BOOST_CHECK(c2.typeid_state() == typeid(ContextZstdSeekable)); BOOST_CHECK(c2.typeid_state() != typeid(ContextUncompressed)); @@ -73,9 +73,9 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) BOOST_CHECK_EQUAL(r_flat.tell(), 0); - written = r_flat.read(buffer, 1024); + written = r_flat.read((char*) &buffer[0], 1024); BOOST_CHECK_EQUAL(written, 403); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); BOOST_CHECK_EQUAL(r_flat.tell(), 403); @@ -85,7 +85,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) BOOST_CHECK_EQUAL(c1.tell(), 0); written = c1.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 403); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); BOOST_CHECK_EQUAL(c1.tell(), 403); @@ -96,22 +96,16 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) // Context equivalent - compressed zstd { - printf("checkpoint 1\n"); - BOOST_CHECK_EQUAL(c2.tell(), 0); - printf("checkpoint 2\n"); written = c2.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 403); - printf("checkpoint 3\n"); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); - printf("checkpoint 4\n"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); BOOST_CHECK_EQUAL(c2.tell(), 403); - printf("checkpoint 5\n"); BOOST_CHECK(c2.typeid_state() != typeid(ContextUncompressed)); BOOST_CHECK(c2.typeid_state() == typeid(ContextZstdSeekable)); @@ -119,12 +113,12 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) // test what happens when file is closed (twice) - written = r_flat.read(buffer, 1024); + written = r_flat.read((char*) &buffer[0], 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); BOOST_CHECK_EQUAL(r_flat.tell(), 403); - written = r_flat.read(buffer, 1024); + written = r_flat.read((char*) &buffer[0], 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); BOOST_CHECK_EQUAL(r_flat.tell(), 403); @@ -189,10 +183,10 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) r_flat.seek(0); - written = r_flat.read(buffer, 4); + written = r_flat.read((char*) &buffer[0], 4); BOOST_CHECK_EQUAL(written, 4); BOOST_CHECK_EQUAL(r_flat.tell(), 4); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); @@ -204,7 +198,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) BOOST_CHECK_EQUAL(written, 4); BOOST_CHECK_EQUAL(c1.tell(), 4); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } @@ -218,7 +212,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) BOOST_CHECK_EQUAL(written, 4); BOOST_CHECK_EQUAL(c2.tell(), 4); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } @@ -227,10 +221,10 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) r_flat.seek(1); // reset to first pos in file BOOST_CHECK_EQUAL(r_flat.tell(), 1); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - written = r_flat.read(buffer, 4); + written = r_flat.read((char*) &buffer[0], 4); BOOST_CHECK_EQUAL(written, 4); BOOST_CHECK_EQUAL(r_flat.tell(), 5); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); @@ -242,7 +236,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) written = c1.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); BOOST_CHECK_EQUAL(c1.tell(), 5); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } @@ -256,7 +250,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) written = c2.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); BOOST_CHECK_EQUAL(c2.tell(), 5); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL(std_buffer.size(), reference3.size()); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); if(std_buffer.compare(reference3) != 0) { @@ -286,16 +280,16 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) } { - chunked_reader r_zstd = chunked_reader(fastafs_file_zstd.c_str()); + chunked_reader_old r_zstd = chunked_reader_old(fastafs_file_zstd.c_str()); // Context equivalent - uncompressed - Context c1(fastafs_file.c_str()); + chunked_reader c1(fastafs_file.c_str()); c1.fopen(0); BOOST_CHECK(c1.typeid_state() == typeid(ContextUncompressed)); BOOST_CHECK(c1.typeid_state() != typeid(ContextZstdSeekable)); // Context equivalent - compressed - Context c2(fastafs_file_zstd.c_str()); + chunked_reader c2(fastafs_file_zstd.c_str()); c2.fopen(0); BOOST_CHECK(c2.typeid_state() == typeid(ContextZstdSeekable)); BOOST_CHECK(c2.typeid_state() != typeid(ContextUncompressed)); @@ -304,14 +298,14 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) written = r_zstd.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 403); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); { written = c1.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 403); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } @@ -319,7 +313,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) { written = c2.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 403); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } @@ -374,7 +368,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) written = r_zstd.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); BOOST_CHECK_EQUAL(r_zstd.tell(), 4); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); @@ -382,7 +376,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) written = c1.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); BOOST_CHECK_EQUAL(r_zstd.tell(), 4); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } @@ -391,7 +385,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) written = c2.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); BOOST_CHECK_EQUAL(r_zstd.tell(), 4); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } @@ -415,7 +409,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) written = r_zstd.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); BOOST_CHECK_EQUAL(r_zstd.tell(), 5); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); @@ -423,7 +417,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) written = c1.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); BOOST_CHECK_EQUAL(r_zstd.tell(), 5); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } @@ -432,7 +426,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) written = c2.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); BOOST_CHECK_EQUAL(r_zstd.tell(), 5); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } @@ -440,7 +434,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) } -BOOST_AUTO_TEST_CASE(test_chunked_reader__large_file) +BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) { // this file needs two buffers as its size is 1593 @@ -456,7 +450,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__large_file) (unsigned) ZSTD_SEEKABLE_FRAME_SIZE); - char buffer[READ_BUFFER_SIZE + 1]; + unsigned char buffer[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); std::string std_buffer; size_t written; @@ -470,25 +464,25 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__large_file) { - chunked_reader r_flat = chunked_reader(fastafs_file.c_str()); + chunked_reader_old r_flat = chunked_reader_old(fastafs_file.c_str()); - written = r_flat.read(buffer, 1024); + written = r_flat.read((char*) &buffer[0], 1024); BOOST_CHECK_EQUAL(written, 1024); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content 1st read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - written = r_flat.read(buffer, 1024); + written = r_flat.read((char*) &buffer[0], 1024); BOOST_CHECK_EQUAL(written, 569); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - written = r_flat.read(buffer, 1024); + written = r_flat.read((char*) &buffer[0], 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - written = r_flat.read(buffer, 1024); + written = r_flat.read((char*) &buffer[0], 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); @@ -496,9 +490,9 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__large_file) // set back r_flat.seek(1024); - written = r_flat.read(buffer, 1024); + written = r_flat.read((char*) &buffer[0], 1024); BOOST_CHECK_EQUAL(written, 569); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); @@ -506,46 +500,46 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__large_file) // set back r_flat.seek(4); - written = r_flat.read(buffer, 1024);// reads across two buffers? + written = r_flat.read((char*) &buffer[0], 1024);// reads across two buffers? BOOST_CHECK_EQUAL(written, 1024); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - written = r_flat.read(buffer, 1024);// reads across two buffers? + written = r_flat.read((char*) &buffer[0], 1024);// reads across two buffers? BOOST_CHECK_EQUAL(written, 565); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference4), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); r_flat.seek(4); - written = r_flat.read(buffer, 4);// reads across two buffers? + written = r_flat.read((char*) &buffer[0], 4);// reads across two buffers? BOOST_CHECK_EQUAL(written, 4); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference5), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - written = r_flat.read(buffer, 1024);// reads across two buffers? + written = r_flat.read((char*) &buffer[0], 1024);// reads across two buffers? BOOST_CHECK_EQUAL(written, 1024); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference6), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } { - chunked_reader r_zstd = chunked_reader(fastafs_file_zstd.c_str()); + chunked_reader_old r_zstd = chunked_reader_old(fastafs_file_zstd.c_str()); written = r_zstd.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 1024); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content 1st read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); written = r_zstd.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 569); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); @@ -563,7 +557,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__large_file) written = r_zstd.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 569); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); @@ -573,13 +567,13 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__large_file) written = r_zstd.read(buffer, 1024);// reads across two buffers? BOOST_CHECK_EQUAL(written, 1024); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); written = r_zstd.read(buffer, 1024);// reads across two buffers? BOOST_CHECK_EQUAL(written, 565); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference4), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); @@ -588,20 +582,20 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__large_file) written = r_zstd.read(buffer, 4);// reads across two buffers? BOOST_CHECK_EQUAL(written, 4); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference5), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); written = r_zstd.read(buffer, 1024);// reads across two buffers? BOOST_CHECK_EQUAL(written, 1024); - std_buffer = std::string(buffer, written); + std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference6), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } } -BOOST_AUTO_TEST_CASE(test_chunked_reader__new_style) +BOOST_AUTO_TEST_CASE(test_chunked_reader_old__new_style) { // this file needs two buffers as its size is 1593 @@ -617,7 +611,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__new_style) (unsigned) ZSTD_SEEKABLE_FRAME_SIZE); - char buffer[READ_BUFFER_SIZE + 1]; + unsigned char buffer[READ_BUFFER_SIZE + 1]; flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); std::string std_buffer; size_t written; @@ -631,22 +625,19 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__new_style) { - ////chunked_reader r_flat = chunked_reader(fastafs_file.c_str()); + ////chunked_reader_old r_flat = chunked_reader_old(fastafs_file.c_str()); - Context c = Context(fasta_file.c_str()); + chunked_reader c = chunked_reader(fasta_file.c_str()); c.fopen(0); // open file handle and load buffer written = c.read(buffer, 10); buffer[written] = '\0'; - printf("\n[%s]\n%i\n",buffer,written); written = c.read(buffer, 100); buffer[written] = '\0'; - printf("\n[%s]\n%i\n",buffer,written); written = c.read(buffer, 100); buffer[written] = '\0'; - printf("\n[%s]\n%i\n",buffer,written); } } diff --git a/test/fastafs/test_fastafs.cpp b/test/fastafs/test_fastafs.cpp index 3db8ed32..aded593d 100644 --- a/test/fastafs/test_fastafs.cpp +++ b/test/fastafs/test_fastafs.cpp @@ -52,6 +52,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_seq_fastafile_size) // > c h r 1 \n t t t t c c c c a a a a g g g \n g \n BOOST_CHECK_EQUAL(fs.data[0]->fasta_filesize(15), 24); chunked_reader file = chunked_reader(fs.filename.c_str()); + file.fopen(0); ffs2f_init* cache_p40 = fs.init_ffs2f(40, true); ffs2f_init* cache_p23 = fs.init_ffs2f(23, true); @@ -102,6 +103,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_seq_fastafile_size_padding_0) // > c h r 1 \n T T T T C C C C A A A A G G G G \n BOOST_CHECK_EQUAL(fs.data[0]->fasta_filesize(fs.data[0]->n), 23); chunked_reader file = chunked_reader(fs.filename.c_str()); + file.fopen(0); ffs2f_init* cache_p0 = fs.init_ffs2f(0, true); // then: check returncodes: @@ -109,13 +111,14 @@ BOOST_AUTO_TEST_CASE(test_fastafs_seq_fastafile_size_padding_0) char chunk[1]; std::string ref = ">chr1\nttttccccaaaagggg\n"; - + for(uint32_t i = 0; i < ref.size(); i++) { ret = fs.data[0]->view_fasta_chunk(cache_p0->sequences[0], chunk, 1, i, file); BOOST_CHECK_EQUAL(chunk[0], ref[i]); // test for '>' BOOST_CHECK_EQUAL(ret, 1); } + // check if out of bound query returns 0 ret = fs.data[0]->view_fasta_chunk(cache_p0->sequences[0], chunk, 1, ref.size(), file); BOOST_CHECK_EQUAL(ret, 0); @@ -140,6 +143,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_seq_fastafile_size_padding_0__no_masking) // > c h r 1 \n T T T T C C C C A A A A G G G G \n BOOST_CHECK_EQUAL(fs.data[0]->fasta_filesize(fs.data[0]->n), 23); chunked_reader file = chunked_reader(fs.filename.c_str()); + file.fopen(0); ffs2f_init* cache_p0 = fs.init_ffs2f(0, false); // no masking; everything must be uppercase @@ -174,6 +178,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_seq_sha1) BOOST_REQUIRE(fs.data.size() > 0); chunked_reader file = chunked_reader(fs.filename.c_str()); + file.fopen(0); //fs.data[0]->sha1(cache_p0->sequences[0], &file); BOOST_CHECK_EQUAL(fs.data[0]->sha1(cache_p0->sequences[0], file), "2c0cae1d4e272b3ba63e7dd7e3c0efe62f2aaa2f"); @@ -194,6 +199,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_seq_md5) BOOST_REQUIRE(fs.data.size() > 0); chunked_reader file = chunked_reader(fs.filename.c_str()); + file.fopen(0); BOOST_CHECK_EQUAL(fs.data[0]->md5(cache->sequences[0], file), "75255c6d90778999ad3643a2e69d4344"); BOOST_CHECK_EQUAL(fs.data[1]->md5(cache->sequences[1], file), "8b5673724a9965c29a1d76fe7031ac8a"); diff --git a/test/fastafs/test_ucsc2bit.cpp b/test/fastafs/test_ucsc2bit.cpp index 025df492..1309e2d0 100644 --- a/test/fastafs/test_ucsc2bit.cpp +++ b/test/fastafs/test_ucsc2bit.cpp @@ -404,8 +404,10 @@ BOOST_AUTO_TEST_CASE(test_fastafs_view_chunked_2bit_with_offset) fastafs fs = fastafs("test"); fs.load(fastafs_file); BOOST_REQUIRE(fs.data.size() > 0); + std::ifstream file(fs.filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate); BOOST_REQUIRE(file.is_open()); + // check ucsc2bit header: char buffer[1024 + 1]; static std::string reference = UCSC2BIT_MAGIC + UCSC2BIT_VERSION + "\x07\x00\x00\x00"s "\x00\x00\x00\x00"s // literals bypass a char* conversion and preserve nullbytes @@ -459,14 +461,19 @@ BOOST_AUTO_TEST_CASE(test_fastafs_view_chunked_2bit_with_offset) "\x00\x00\x00\x00"s "\x09\x30" // NNAC TG?? = 00001001 00110000 ; + uint32_t complen; // voor lengte 1...(245-1) // voor i = 0, 245-lengte - for(complen = 1; complen < reference.size(); complen++) { - for(uint32_t file_offset = 0; file_offset < reference.size() - complen - 1; file_offset++) { + for(complen = 1; complen < reference.size(); complen++) + { + for(uint32_t file_offset = 0; file_offset < reference.size() - complen - 1; file_offset++) + { fs.view_ucsc2bit_chunk(buffer, complen, file_offset); BOOST_CHECK_EQUAL_MESSAGE(reference.compare(file_offset, complen, std_string_nullbyte_safe(buffer, 0, complen), 0, complen), 0, "Failed during len=" << complen << " and file offset=" << file_offset); } + + printf("\n"); } //for(uint32_t i = 0; i < complen; i++) { //printf("ref[%i]: %u\t == buf[%i]: %u",i + file_offset, (signed char) reference[i + file_offset], i, (signed char) buffer[i], (unsigned char) buffer[i]); diff --git a/test/flags/test_flags.cpp b/test/flags/test_flags.cpp index 51e9a2a8..81f5ff22 100644 --- a/test/flags/test_flags.cpp +++ b/test/flags/test_flags.cpp @@ -18,7 +18,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_flags) { fastafs_flags f; - char buffer[2 + 1]; + unsigned char buffer[2 + 1]; buffer[2] = '\0'; // test: 00000000 00000000 diff --git a/test/view/test_view.cpp b/test/view/test_view.cpp index e883eb1d..74bdd94c 100644 --- a/test/view/test_view.cpp +++ b/test/view/test_view.cpp @@ -146,7 +146,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_twobit_offset_calc) fasta_to_fastafs("test/data/test.fa", fastafs_file, false); fastafs fs = fastafs("test"); fs.load(fastafs_file); - + for(uint32_t i = 0 ; i <= 7; i++) { in_N = fs.data[1]->get_n_offset(i, &num_Ns); BOOST_CHECK_EQUAL(num_Ns, 0); @@ -336,6 +336,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing) std::string full_file = ">chr1\nTTTT\nCCCC\nAAAA\nGGGG\n>chr2\nACTG\nACTG\nNNNN\nACTG\n>chr3.1\nACTG\nACTG\nAAAA\nC\n>chr3.2\nACTG\nACTG\nAAAA\nCC\n>chr3.3\nACTG\nACTG\nAAAA\nCCC\n>chr4\nACTG\nNNNN\n>chr5\nNNAC\nTG\n"; //std::string full_file = ">chr1 TTTT CCCC AAAA GGGG >chr2 ACTG ACTG NNNN ACTG >chr3.1 ACTG ACTG AAAA C >chr3.2 ACTG ACTG AAAA CC >chr3.3 ACTG ACTG AAAA CCC >chr4 ACTG NNNN >chr5 NNAC TG "; chunked_reader fhc = chunked_reader(fs.filename.c_str()); + fhc.fopen(0); for(uint32_t offset = 0; offset < 62; ++offset) { std::string substr_file = full_file.substr(offset, 100); @@ -380,6 +381,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_sub) // test fastafs_seq functions //std::ifstream fh(fastafs_file.c_str(), std::ios::in | std::ios::binary | std::ios::ate); chunked_reader fh = chunked_reader(fastafs_file.c_str()); + fh.fopen(0); //BOOST_REQUIRE(fh.is_open()); // 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 @@ -521,6 +523,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_fourbit) std::string full_file = ">IUPAC\nNBKA\nHMDC\nUWGS\nYVTR\nHGWV\nUMTB\nSDN-\n----\n----\n-BGY\nADNH\nSMUT\nRCKW\nVsbh\nvdnr\ntgyc\nmkwu\naAVT\nSDKN\nB---\nUGWM\nHYRC\n";// length = 117 chunked_reader fhc = chunked_reader(fs.filename.c_str()); + fhc.fopen(0); for(uint32_t offset = 0; offset < 62; ++offset) { std::string substr_file = full_file.substr(offset, 200); @@ -883,6 +886,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing2) ffs2f_init* cache = fs.init_ffs2f(60, true); chunked_reader fhc = chunked_reader(fs.filename.c_str()); + fhc.fopen(0); /* maak alle substrings: [....] From 351a7861569416c3c422e087af4ece7a61c74f35 Mon Sep 17 00:00:00 2001 From: youri Date: Thu, 15 Sep 2022 14:02:06 +0200 Subject: [PATCH 54/65] avoid testing debug-only features --- src/chunked_reader.cpp | 1 - test/sequenceregion/test_sequenceregion.cpp | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index ba4b8ec1..8448f5cd 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -6,7 +6,6 @@ chunked_reader::chunked_reader(char * afilename) : fh_flat(nullptr), fh_zstd(nullptr), buffer_i(0), buffer_n(0), file_i(0) { - this->filename = realpath_cpp(afilename); this->init(); } diff --git a/test/sequenceregion/test_sequenceregion.cpp b/test/sequenceregion/test_sequenceregion.cpp index acf7033a..c46cb63b 100644 --- a/test/sequenceregion/test_sequenceregion.cpp +++ b/test/sequenceregion/test_sequenceregion.cpp @@ -94,7 +94,9 @@ BOOST_AUTO_TEST_CASE(test_sequence_region) sequence_region *sr = nullptr; if(sr == nullptr) {// compiler doesn't understand this otherwise +#if DEBUG BOOST_CHECK_THROW(sr = new sequence_region(&(arg[5])), std::invalid_argument); +#endif //DEBUG } } @@ -171,7 +173,9 @@ BOOST_AUTO_TEST_CASE(test_sequence_region3) if(sr == nullptr) {// compiler doesn't understand this otherwise char arg[] = "/seq/chrRr1:1235-1234"; +#if DEBUG BOOST_CHECK_THROW(sr = new sequence_region(&(arg[5])), std::invalid_argument); +#endif //DEBUG } } @@ -248,7 +252,9 @@ BOOST_AUTO_TEST_CASE(test_sequence_region2) sequence_region *sr = nullptr; if(sr == nullptr) {// compiler doesn't understand this otherwise +#if DEBUG BOOST_CHECK_THROW(sr = new sequence_region(&(arg[5])), std::invalid_argument); +#endif //DEBUG } } From a725e0ab5d7c39cbc5dbcf06d45f5e4c94ae6734 Mon Sep 17 00:00:00 2001 From: youri Date: Thu, 15 Sep 2022 14:12:52 +0200 Subject: [PATCH 55/65] typo resolved --- src/chunked_reader.cpp | 2 ++ test/chunked_reader/test_chunked_reader.cpp | 10 +++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 8448f5cd..0e3aaeef 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -364,6 +364,8 @@ void Context::fopen(off_t file_offset) void Context::seek(off_t arg_offset) { + printf("context::seek()\n"); + this->file_i = arg_offset; // @todo obtain return value from this->state->seek() and limit this this->state->seek(arg_offset);// set file pointer this->cache_buffer();// update internal buffer diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index f50336c7..98cfeb2f 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -359,16 +359,16 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader__small_file) // test seek stuff r_zstd.seek(0); // reset to first pos in file - BOOST_CHECK_EQUAL(r_zstd.tell(), 0); + BOOST_REQUIRE_EQUAL(r_zstd.tell(), 0); { - r_zstd.seek(0); // reset to first pos in file - BOOST_CHECK_EQUAL(c1.tell(), 0); + c1.seek(0); // reset to first pos in file + BOOST_REQUIRE_EQUAL(c1.tell(), 0); } { - r_zstd.seek(0); // reset to first pos in file - BOOST_CHECK_EQUAL(c2.tell(), 0); + c2.seek(0); // reset to first pos in file + BOOST_REQUIRE_EQUAL(c2.tell(), 0); } written = r_zstd.read(buffer, 4); From 944b684cb885498555ad61e2183e188bda88c613 Mon Sep 17 00:00:00 2001 From: youri Date: Thu, 15 Sep 2022 14:15:23 +0200 Subject: [PATCH 56/65] meson fix --- meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 615879a5..866f22bb 100644 --- a/meson.build +++ b/meson.build @@ -30,7 +30,7 @@ configuration_inc = include_directories('include') src = [ './dependencies/zstd-lib-common/xxhash.cpp', './dependencies/zstd-seekable-adapted/zstdseek_compress.cpp', './dependencies/zstd-seekable-adapted/zstdseek_decompress.cpp', './dependencies/zstd-seekable-adapted/zstdseek_utils.cpp', -'./src/chunked_reader.cpp', './src/database.cpp', './src/fastafs.cpp', './src/fasta_to_fastafs.cpp', './src/fivebit_fivebytes.cpp', './src/flags.cpp', './src/fourbit_byte.cpp', './src/fuse.cpp', './src/sequence_region.cpp', './src/twobit_byte.cpp', './src/ucsc2bit.cpp', './src/ucsc2bit_to_fastafs.cpp', './src/utils.cpp', './src/lsfastafs.cpp', './src/main.cpp'] +'./src/chunked_reader.cpp', './src/database.cpp', './src/fastafs.cpp', './src/fasta_to_fastafs.cpp', './src/fivebit_fivebytes.cpp', './src/flags.cpp', './src/xbit_byte_encoder.cpp', './src/fourbit_byte.cpp', './src/fuse.cpp', './src/sequence_region.cpp', './src/twobit_byte.cpp', './src/ucsc2bit.cpp', './src/ucsc2bit_to_fastafs.cpp', './src/utils.cpp', './src/lsfastafs.cpp', './src/main.cpp'] incdir = include_directories('dependencies/zstd-seekable-adapted', './dependencies/zstd-lib-common', 'include') From 9eaed10f67e11eb28999c71161f3aa849f491a09 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Wed, 21 Sep 2022 12:04:43 +0200 Subject: [PATCH 57/65] changes --- src/chunked_reader.cpp | 10 +++++----- src/fastafs.cpp | 10 ---------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 013bf49c..27d75b13 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -345,6 +345,8 @@ size_t chunked_reader::cache_buffer() this->buffer_i = 0; this->file_i += s; + + return s; } size_t chunked_reader::read(unsigned char *arg_buffer, size_t arg_buffer_size) @@ -389,8 +391,6 @@ void chunked_reader::fopen(off_t file_offset) void chunked_reader::seek(off_t arg_offset) { - printf("context::seek()\n"); - this->file_i = arg_offset; // @todo obtain return value from this->state->seek() and limit this this->state->seek(arg_offset);// set file pointer this->cache_buffer();// update internal buffer @@ -545,7 +545,8 @@ size_t ContextZstdSeekable::cache_buffer() this->context->get_buffer(), this->context->tell() + READ_BUFFER_SIZE //this->context->file_i + READ_BUFFER_SIZE ); - + + //printf("written = %i\n", written); //printf("{{%s}}\n", this->context->get_buffer()); @@ -593,7 +594,7 @@ void ContextZstdSeekable::fopen(off_t start_pos) } else { - fseek_orDie(this->fh->fin, 0, SEEK_SET);// set initial file handle to 0? + fseek_orDie(this->fh->fin, start_pos, SEEK_SET);// set initial file handle to 0? // this->fh->seekg(start_pos, std::ios::beg); size_t const initResult = ZSTD_seekable_initFile(this->seekable, fh->fin); @@ -606,7 +607,6 @@ void ContextZstdSeekable::fopen(off_t start_pos) void ContextZstdSeekable::seek(off_t arg_offset) { - printf("fseekordie: %i\n", arg_offset); fseek_orDie(fh->fin, arg_offset, SEEK_SET); } diff --git a/src/fastafs.cpp b/src/fastafs.cpp index e3ac4d08..0a696cf3 100644 --- a/src/fastafs.cpp +++ b/src/fastafs.cpp @@ -639,23 +639,13 @@ fastafs::~fastafs() void fastafs::load(std::string afilename) { - printf("aa\n"); std::streampos size; unsigned char *memblock; - printf("ab\n"); chunked_reader fh_in = chunked_reader(afilename.c_str()); - printf("ac\n"); - { - printf("ad\n"); fh_in.fopen(0); - printf("ae\n"); - this->filetype = fh_in.get_filetype(); - printf("af\n"); - - memblock = new unsigned char [20 + 1]; //sha1 is 20b // if a user can't compile this line, please replace it with C's From 9fdd964953b22b2f99065c44b67a011787fb0ab7 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sat, 21 Jan 2023 19:58:37 +0100 Subject: [PATCH 58/65] resolves an error --- .gitignore | 2 ++ src/chunked_reader.cpp | 19 +++++++------------ 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index a4347fa2..4d9bd62d 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,5 @@ analysis.txt *.naf .kdev4 compile_commands.json +*.fastafs +*.zst diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 27d75b13..7cf46cf8 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -437,18 +437,17 @@ State *chunked_reader::find_state() void ContextUncompressed::fopen(off_t start_pos = 0) { - if(this->fh == nullptr) - { - throw std::runtime_error("[ContextUncompressed::fopen] empty fh?.\n"); - } if(this->fh != nullptr) { throw std::runtime_error("[ContextUncompressed::fopen] opening a non closed reader.\n"); } - + this->fh = new std::ifstream; this->fh->open(this->context->get_filename().c_str(), std::ios::in | std::ios::binary | std::ios::ate); - + if(this->fh == nullptr) + { + throw std::runtime_error("[ContextUncompressed::fopen] empty fh?\n"); + } if(this->fh->is_open()) // @todo move to top-level fopen() { @@ -515,14 +514,10 @@ ContextUncompressed::~ContextUncompressed() { if(this->fh != nullptr) { + this->fh->close(); if(!this->fh) { - this->fh->close(); - throw std::runtime_error("[ContextUncompressed::~ContextUncompressed] unexpected closed filehandle found.\n"); - } - else - { - this->fh->close(); + std::cerr << "[ContextUncompressed::~ContextUncompressed] unexpected closed filehandle found.\n"; } delete this->fh; From 18597da5cadcd0e0acf08a93c415605c0400cb43 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sat, 21 Jan 2023 20:11:25 +0100 Subject: [PATCH 59/65] more extensive .tell() tests --- test/chunked_reader/test_chunked_reader.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index 16c55627..d5f567db 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -375,7 +375,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) { written = c1.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); - BOOST_CHECK_EQUAL(r_zstd.tell(), 4); + BOOST_CHECK_EQUAL(c1.tell(), 4); std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); @@ -384,7 +384,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) { written = c2.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); - BOOST_CHECK_EQUAL(r_zstd.tell(), 4); + BOOST_CHECK_EQUAL(c2.tell(), 4); std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); @@ -396,13 +396,13 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) { c1.seek(1); // reset to first pos in file - BOOST_CHECK_EQUAL(r_zstd.tell(), 1); + BOOST_CHECK_EQUAL(c1.tell(), 1); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } { c2.seek(1); // reset to first pos in file - BOOST_CHECK_EQUAL(r_zstd.tell(), 1); + BOOST_CHECK_EQUAL(c2.tell(), 1); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } @@ -416,7 +416,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) { written = c1.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); - BOOST_CHECK_EQUAL(r_zstd.tell(), 5); + BOOST_CHECK_EQUAL(c1.tell(), 5); std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); @@ -425,7 +425,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) { written = c2.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); - BOOST_CHECK_EQUAL(r_zstd.tell(), 5); + BOOST_CHECK_EQUAL(c2.tell(), 5); std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); From da4020cf7cf317e3fedc9daf7981e610a1e63456 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sat, 21 Jan 2023 20:16:19 +0100 Subject: [PATCH 60/65] extends testing new code --- test/chunked_reader/test_chunked_reader.cpp | 31 +++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index d5f567db..151888cf 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -465,12 +465,43 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) { chunked_reader_old r_flat = chunked_reader_old(fastafs_file.c_str()); + + // Context equivalent - uncompressed + chunked_reader c1(fastafs_file.c_str()); + c1.fopen(0); + BOOST_CHECK(c1.typeid_state() == typeid(ContextUncompressed)); + BOOST_CHECK(c1.typeid_state() != typeid(ContextZstdSeekable)); + + // Context equivalent - compressed + chunked_reader c2(fastafs_file_zstd.c_str()); + c2.fopen(0); + BOOST_CHECK(c2.typeid_state() == typeid(ContextZstdSeekable)); + BOOST_CHECK(c2.typeid_state() != typeid(ContextUncompressed)); + + written = r_flat.read((char*) &buffer[0], 1024); BOOST_CHECK_EQUAL(written, 1024); std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content 1st read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + + { // C1 + written = c1.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 1024); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content 1st read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + { // C2 + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 1024); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content 1st read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + written = r_flat.read((char*) &buffer[0], 1024); BOOST_CHECK_EQUAL(written, 569); From d78e2c41e1fcea4ec1646fcb6cf425a94d7b9687 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sat, 21 Jan 2023 20:23:25 +0100 Subject: [PATCH 61/65] more tests when building with meson --- meson.build | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/meson.build b/meson.build index 5231aba4..d694b46e 100644 --- a/meson.build +++ b/meson.build @@ -31,16 +31,30 @@ src = [ './dependencies/zstd-lib-common/xxhash.cpp', './dependencies/zstd-seekable-adapted/zstdseek_compress.cpp', './dependencies/zstd-seekable-adapted/zstdseek_decompress.cpp', './dependencies/zstd-seekable-adapted/zstdseek_utils.cpp', './src/chunked_reader.cpp', './src/database.cpp', './src/fastafs.cpp', './src/fasta_to_fastafs.cpp', './src/xbit_byte_encoder.cpp', './src/fivebit_fivebytes.cpp', './src/flags.cpp', './src/fourbit_byte.cpp', './src/fuse.cpp', './src/sequence_region.cpp', './src/twobit_byte.cpp', './src/ucsc2bit.cpp', './src/ucsc2bit_to_fastafs.cpp', './src/utils.cpp', './src/lsfastafs.cpp', - './src/main.cpp' - ] +'./src/main.cpp' +] + +src__test_check = [ +'./dependencies/zstd-lib-common/xxhash.cpp', +'./dependencies/zstd-seekable-adapted/zstdseek_compress.cpp', './dependencies/zstd-seekable-adapted/zstdseek_decompress.cpp', './dependencies/zstd-seekable-adapted/zstdseek_utils.cpp', +'./src/chunked_reader.cpp', './src/database.cpp', './src/fastafs.cpp', './src/fasta_to_fastafs.cpp', './src/xbit_byte_encoder.cpp', './src/fivebit_fivebytes.cpp', './src/flags.cpp', './src/fourbit_byte.cpp', './src/fuse.cpp', './src/sequence_region.cpp', './src/twobit_byte.cpp', './src/ucsc2bit.cpp', './src/ucsc2bit_to_fastafs.cpp', './src/utils.cpp', './src/lsfastafs.cpp', +'./test/check/test_check.cpp' +] -src2 = [ +src__test_cache = [ './dependencies/zstd-lib-common/xxhash.cpp', './dependencies/zstd-seekable-adapted/zstdseek_compress.cpp', './dependencies/zstd-seekable-adapted/zstdseek_decompress.cpp', './dependencies/zstd-seekable-adapted/zstdseek_utils.cpp', './src/chunked_reader.cpp', './src/database.cpp', './src/fastafs.cpp', './src/fasta_to_fastafs.cpp', './src/xbit_byte_encoder.cpp', './src/fivebit_fivebytes.cpp', './src/flags.cpp', './src/fourbit_byte.cpp', './src/fuse.cpp', './src/sequence_region.cpp', './src/twobit_byte.cpp', './src/ucsc2bit.cpp', './src/ucsc2bit_to_fastafs.cpp', './src/utils.cpp', './src/lsfastafs.cpp', './test/cache/test_cache.cpp' ] +src__test_chunked_reader = [ +'./dependencies/zstd-lib-common/xxhash.cpp', +'./dependencies/zstd-seekable-adapted/zstdseek_compress.cpp', './dependencies/zstd-seekable-adapted/zstdseek_decompress.cpp', './dependencies/zstd-seekable-adapted/zstdseek_utils.cpp', +'./src/chunked_reader.cpp', './src/database.cpp', './src/fastafs.cpp', './src/fasta_to_fastafs.cpp', './src/xbit_byte_encoder.cpp', './src/fivebit_fivebytes.cpp', './src/flags.cpp', './src/fourbit_byte.cpp', './src/fuse.cpp', './src/sequence_region.cpp', './src/twobit_byte.cpp', './src/ucsc2bit.cpp', './src/ucsc2bit_to_fastafs.cpp', './src/utils.cpp', './src/lsfastafs.cpp', + './test/chunked_reader/test_chunked_reader.cpp' + ] + incdir = include_directories('dependencies/zstd-seekable-adapted', './dependencies/zstd-lib-common', 'include') @@ -55,7 +69,15 @@ executable('fastafs', src, include_directories : incdir, dependencies: [crypto, openssl, fuse, zlib, zstd]) -executable('test_cache_meson', src2, +executable('test_cache', src__test_cache, + include_directories : incdir, + dependencies: [crypto, openssl, fuse, zlib, zstd]) + +executable('test_check', src__test_check, + include_directories : incdir, + dependencies: [crypto, openssl, fuse, zlib, zstd]) + +executable('test_chunked_reader', src__test_chunked_reader, include_directories : incdir, dependencies: [crypto, openssl, fuse, zlib, zstd]) From 2b8a596166d4fddd9a008bc6762fb059f09dabc0 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sat, 21 Jan 2023 20:27:00 +0100 Subject: [PATCH 62/65] more extended tests --- test/chunked_reader/test_chunked_reader.cpp | 28 +++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index 151888cf..4faf6582 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -508,10 +508,38 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + + { // C1 + written = c1.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 569); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + { // C2 + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 569); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } written = r_flat.read((char*) &buffer[0], 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + + { // C1 + written = c1.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + { // C2 + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } written = r_flat.read((char*) &buffer[0], 1024); BOOST_CHECK_EQUAL(written, 0); From 15065e5a9ad5cd6c4528ec8911bfeef1b71c1c08 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sun, 22 Jan 2023 14:59:19 +0100 Subject: [PATCH 63/65] updates --- include/chunked_reader.hpp | 13 +- src/chunked_reader.cpp | 135 ++++----- src/fastafs.cpp | 2 +- src/ucsc2bit_to_fastafs.cpp | 2 +- src/xbit_byte_encoder.cpp | 2 +- test/chunked_reader/test_chunked_reader.cpp | 291 +++++++++++++------- test/fastafs/test_fastafs.cpp | 2 +- test/fastafs/test_ucsc2bit.cpp | 6 +- test/view/test_view.cpp | 2 +- 9 files changed, 254 insertions(+), 201 deletions(-) diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index 1c322dc7..d47b7533 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -38,9 +38,9 @@ class chunked_reader_old // dit is Context ZSTD_seekable_decompress_init_data* fh_zstd; std::string filename; // try doing this with inode - + compression_type filetype; - + char buffer[READ_BUFFER_SIZE + 1]; size_t buffer_i; size_t buffer_n; @@ -62,7 +62,10 @@ class chunked_reader_old // dit is Context void find_filetype(); void set_filetype(compression_type); - compression_type get_filetype() { return this->filetype ; }; + compression_type get_filetype() + { + return this->filetype ; + }; size_t read(char *, size_t);// @deprecate size_t read(unsigned char *, size_t); @@ -113,7 +116,7 @@ class ContextZstdSeekable : public State { private: ZSTD_seekable_decompress_init_data* fh = nullptr; - + size_t const buffOutSize = ZSTD_DStreamOutSize(); char* const buffOut = (char*) malloc_orDie(buffOutSize); ZSTD_seekable* const seekable = ZSTD_seekable_create(); //@todo -> in constructor, check if not NULL @@ -147,7 +150,7 @@ class chunked_reader // master chunked_reader void TransitionTo(State *); // @todo rename to set_compression_type chunked_reader(const char *) ; ~chunked_reader(); - + State* find_state(); const std::type_info& typeid_state(); diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 7cf46cf8..6d138c79 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -259,30 +259,26 @@ void State::set_context(chunked_reader *arg_context) // This does not read the actual flat file, this copies its internal buffer to arg_buffer_to size_t State::read(unsigned char *arg_buffer_to, size_t arg_buffer_to_size, - size_t &buffer_i, size_t &buffer_n) + size_t &buffer_i, size_t &buffer_n) { #if DEBUG - if(arg_buffer_to_size > READ_BUFFER_SIZE) - { + if(arg_buffer_to_size > READ_BUFFER_SIZE) { throw std::runtime_error("[ContextUncompressed::read] Requested buffer size larger than internal context buffer.\n"); } #endif //DEBUG size_t written = 0; const size_t n1 = std::min(buffer_n - buffer_i, arg_buffer_to_size);// number of characters to copy - + // copy current internal buffer completely - while(written < n1) - { + while(written < n1) { arg_buffer_to[written++] = this->context->get_buffer()[buffer_i++]; } - if(written < arg_buffer_to_size) - { + if(written < arg_buffer_to_size) { this->context->cache_buffer();// needs to set n to 0 - while(buffer_i < buffer_n and written < arg_buffer_to_size) - { + while(buffer_i < buffer_n and written < arg_buffer_to_size) { arg_buffer_to[written++] = this->context->get_buffer()[buffer_i++]; } } @@ -321,16 +317,11 @@ char * chunked_reader::get_buffer() //@todo remove and use typeid only compression_type chunked_reader::get_filetype() { - if(this->typeid_state() == typeid(ContextUncompressed)) - { + if(this->typeid_state() == typeid(ContextUncompressed)) { return compression_type::uncompressed; - } - else if(this->typeid_state() == typeid(ContextZstdSeekable)) - { + } else if(this->typeid_state() == typeid(ContextZstdSeekable)) { return compression_type::zstd; - } - else - { + } else { return compression_type::undefined; } } @@ -345,7 +336,7 @@ size_t chunked_reader::cache_buffer() this->buffer_i = 0; this->file_i += s; - + return s; } @@ -353,28 +344,26 @@ size_t chunked_reader::read(unsigned char *arg_buffer, size_t arg_buffer_size) { //arg_buffer_size = std::min(arg_buffer_size, (size_t) READ_BUFFER_SIZE); #if DEBUG - - if(arg_buffer == nullptr) - { + + if(arg_buffer == nullptr) { throw std::runtime_error("[chunked_reader::read] Invalid / not allocated buffer.\n"); } - if(arg_buffer_size > READ_BUFFER_SIZE) - { + if(arg_buffer_size > READ_BUFFER_SIZE) { throw std::runtime_error("[chunked_reader::read] Requested buffer size larger than internal context buffer.\n"); } - + #endif //DEBUG - + return this->state->read(arg_buffer, arg_buffer_size, this->buffer_i, this->buffer_n); } -void chunked_reader::TransitionTo(State *arg_state) { +void chunked_reader::TransitionTo(State *arg_state) +{ - if(this->state != nullptr) - { + if(this->state != nullptr) { delete this->state; // delete and destruct previous state, incl file points, should also run fh.close(); etc. } @@ -401,11 +390,11 @@ void chunked_reader::seek(off_t arg_offset) size_t chunked_reader::tell() { //printf("Context :: tell: %i - %i + %i = %i\n", - //this->file_i , - //this->buffer_n , + //this->file_i , + //this->buffer_n , //this->buffer_i , - //this->file_i - this->buffer_n + this->buffer_i); - + //this->file_i - this->buffer_n + this->buffer_i); + return this->file_i - this->buffer_n + this->buffer_i; } @@ -424,12 +413,9 @@ const std::type_info& chunked_reader::typeid_state() State *chunked_reader::find_state() { - if(is_zstd_file(this->filename.c_str())) - { + if(is_zstd_file(this->filename.c_str())) { return new ContextZstdSeekable; - } - else - { + } else { return new ContextUncompressed; } } @@ -437,24 +423,19 @@ State *chunked_reader::find_state() void ContextUncompressed::fopen(off_t start_pos = 0) { - if(this->fh != nullptr) - { + if(this->fh != nullptr) { throw std::runtime_error("[ContextUncompressed::fopen] opening a non closed reader.\n"); } - + this->fh = new std::ifstream; this->fh->open(this->context->get_filename().c_str(), std::ios::in | std::ios::binary | std::ios::ate); - if(this->fh == nullptr) - { + if(this->fh == nullptr) { throw std::runtime_error("[ContextUncompressed::fopen] empty fh?\n"); } - if(this->fh->is_open()) // @todo move to top-level fopen() - { + if(this->fh->is_open()) { // @todo move to top-level fopen() this->seek(start_pos); - } - else - { + } else { throw std::runtime_error("[chunked_reader_old::init] Cannot open file for reading.\n"); } } @@ -462,13 +443,11 @@ void ContextUncompressed::fopen(off_t start_pos = 0) size_t ContextUncompressed::cache_buffer() { #if DEBUG - if(this->fh->tellg() == -1) - { + if(this->fh->tellg() == -1) { throw std::runtime_error("ContextUncompressed::cache_buffer\n"); } - - if(this->context->get_buffer() == nullptr) - { + + if(this->context->get_buffer() == nullptr) { throw std::runtime_error("ContextUncompressed::cache_buffer - no valid buffer?\n"); } #endif //DEBUG @@ -492,7 +471,7 @@ size_t ContextUncompressed::cache_buffer() this->fh->clear(); this->fh->seekg(0, std::ios::end); } - + return s; } @@ -500,8 +479,7 @@ size_t ContextUncompressed::cache_buffer() void ContextUncompressed::seek(off_t arg_offset) { - if(!this->fh->is_open()) - { + if(!this->fh->is_open()) { throw std::runtime_error("[ContextUncompressed::seek] unexpected closed filehandle found.\n"); } @@ -512,11 +490,9 @@ void ContextUncompressed::seek(off_t arg_offset) ContextUncompressed::~ContextUncompressed() { - if(this->fh != nullptr) - { + if(this->fh != nullptr) { this->fh->close(); - if(!this->fh) - { + if(!this->fh) { std::cerr << "[ContextUncompressed::~ContextUncompressed] unexpected closed filehandle found.\n"; } @@ -531,20 +507,20 @@ size_t ContextZstdSeekable::cache_buffer() { //size_t written = ZSTD_seekable_decompressFile_orDie(this->fh_zstd, this->file_i, this->buffer, this->file_i + READ_BUFFER_SIZE); //this->fh->read(this->context->get_buffer(), READ_BUFFER_SIZE); - + // figure out the location in the decompressed file - + size_t written = ZSTD_seekable_decompressFile_orDie( - this->fh, - this->context->get_file_i(), //this->context->file_i, - this->context->get_buffer(), - this->context->tell() + READ_BUFFER_SIZE //this->context->file_i + READ_BUFFER_SIZE - ); + this->fh, + this->context->get_file_i(), //this->context->file_i, + this->context->get_buffer(), + this->context->tell() + READ_BUFFER_SIZE //this->context->file_i + READ_BUFFER_SIZE + ); //printf("written = %i\n", written); //printf("{{%s}}\n", this->context->get_buffer()); - + /* { #if DEBUG @@ -568,32 +544,31 @@ size_t ContextZstdSeekable::cache_buffer() */ //throw std::runtime_error("[ContextZstdSeekable::cache_buffer] not implemented.\n"); - + return written; } void ContextZstdSeekable::fopen(off_t start_pos) { - if(this->fh != nullptr) - { + if(this->fh != nullptr) { throw std::runtime_error("[ContextZstdSeekable::fopen] opening a non closed reader.\n"); } - - + + this->fh = ZSTD_seekable_decompressFile_init(this->context->get_filename().c_str()); - if((this->fh->fin == NULL) | feof(this->fh->fin)) - { + if((this->fh->fin == NULL) | feof(this->fh->fin)) { throw std::runtime_error("[ContextZstdSeekable::fopen] not implemented.\n"); - } - else - { + } else { fseek_orDie(this->fh->fin, start_pos, SEEK_SET);// set initial file handle to 0? // this->fh->seekg(start_pos, std::ios::beg); size_t const initResult = ZSTD_seekable_initFile(this->seekable, fh->fin); - if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); } + if(ZSTD_isError(initResult)) { + fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); + exit(11); + } //@todo class member? this->maxFileSize = ZSTD_seekable_getFileDecompressedSize(this->seekable); @@ -616,6 +591,6 @@ ContextZstdSeekable::~ContextZstdSeekable() delete this->fh; } - + //throw std::runtime_error("[ContextUncompressed::~ContextUncompressed] not implemented.\n"); } diff --git a/src/fastafs.cpp b/src/fastafs.cpp index 0a696cf3..20485bbd 100644 --- a/src/fastafs.cpp +++ b/src/fastafs.cpp @@ -641,7 +641,7 @@ void fastafs::load(std::string afilename) { std::streampos size; unsigned char *memblock; - + chunked_reader fh_in = chunked_reader(afilename.c_str()); { fh_in.fopen(0); diff --git a/src/ucsc2bit_to_fastafs.cpp b/src/ucsc2bit_to_fastafs.cpp index a108da11..60c7c618 100644 --- a/src/ucsc2bit_to_fastafs.cpp +++ b/src/ucsc2bit_to_fastafs.cpp @@ -51,7 +51,7 @@ size_t ucsc2bit_to_fastafs(std::string ucsc2bit_file, std::string fastafs_file) fh_fastafs << "\x00\x00\x00\x00"s;// position of metedata ~ unknown YET // Read UCSC2bit header (n seq) - fh_ucsc2bit.read( (char*)( &buffer[0]) , 12);//conversion from unsigned char* to char* (https://stackoverflow.com/questions/604431/c-reading-unsigned-char-from-file-stream) + fh_ucsc2bit.read((char*)(&buffer[0]), 12); //conversion from unsigned char* to char* (https://stackoverflow.com/questions/604431/c-reading-unsigned-char-from-file-stream) n = fourbytes_to_uint_ucsc2bit(buffer, 8); uint_to_fourbytes(buffer, n); std::vector data(n); diff --git a/src/xbit_byte_encoder.cpp b/src/xbit_byte_encoder.cpp index 29384592..f1a2c825 100644 --- a/src/xbit_byte_encoder.cpp +++ b/src/xbit_byte_encoder.cpp @@ -12,7 +12,7 @@ void xbit_byte_encoder::next(chunked_reader &r) unsigned char *buf = new unsigned char[2]; r.read(buf, 1); this->data = buf[0]; - + delete[] buf; } diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index 4faf6582..dfcecc99 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -57,13 +57,13 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) { // old init chunked_reader_old r_flat = chunked_reader_old(fastafs_file.c_str()); - + // Context equivalent - uncompressed chunked_reader c1(fastafs_file.c_str()); c1.fopen(0); BOOST_CHECK(c1.typeid_state() == typeid(ContextUncompressed)); BOOST_CHECK(c1.typeid_state() != typeid(ContextZstdSeekable)); - + // Context equivalent - compressed chunked_reader c2(fastafs_file_zstd.c_str()); c2.fopen(0); @@ -93,7 +93,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) BOOST_CHECK(c1.typeid_state() == typeid(ContextUncompressed)); BOOST_CHECK(c1.typeid_state() != typeid(ContextZstdSeekable)); } - + // Context equivalent - compressed zstd { BOOST_CHECK_EQUAL(c2.tell(), 0); @@ -135,7 +135,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); BOOST_CHECK_EQUAL(c1.tell(), 403); } - + // Context equivalent - compressed zstd { written = c2.read(buffer, 1024); @@ -254,11 +254,11 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) BOOST_CHECK_EQUAL(std_buffer.size(), reference3.size()); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); if(std_buffer.compare(reference3) != 0) { - printf("%i != %i\n",reference3.size(), std_buffer.size()); - printf("%s != %s\n",reference3, std_buffer); - - printf("[%u][%u][%u][%u]\n",(unsigned char) reference3[0],reference3[1],reference3[2],reference3[3]); - printf("[%u][%u][%u][%u]\n",(unsigned char) buffer[0],buffer[1],buffer[2],buffer[3]); + printf("%u != %u\n", (unsigned int) reference3.size(), (unsigned int) std_buffer.size()); + printf("%s != %s\n", reference3.c_str(), std_buffer.c_str()); + + printf("[%u][%u][%u][%u]\n", (unsigned char) reference3[0], reference3[1], reference3[2], reference3[3]); + printf("[%u][%u][%u][%u]\n", (unsigned char) buffer[0], buffer[1], buffer[2], buffer[3]); //printf("[%c][%c][%c][%c]\n"); } flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); @@ -266,28 +266,28 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) //@todo should trigger error!? - r_flat.seek(1024*1024); // trigger out of bound + r_flat.seek(1024 * 1024); // trigger out of bound // Context equivalent - uncompressed { - c1.seek(1024*1024); + c1.seek(1024 * 1024); } // Context equivalent - compressed zstd { - c2.seek(1024*1024); + c2.seek(1024 * 1024); } } { chunked_reader_old r_zstd = chunked_reader_old(fastafs_file_zstd.c_str()); - + // Context equivalent - uncompressed chunked_reader c1(fastafs_file.c_str()); c1.fopen(0); BOOST_CHECK(c1.typeid_state() == typeid(ContextUncompressed)); BOOST_CHECK(c1.typeid_state() != typeid(ContextZstdSeekable)); - + // Context equivalent - compressed chunked_reader c2(fastafs_file_zstd.c_str()); c2.fopen(0); @@ -301,7 +301,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - + { written = c1.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 403); @@ -354,12 +354,12 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) // test seek stuff r_zstd.seek(0); // reset to first pos in file BOOST_REQUIRE_EQUAL(r_zstd.tell(), 0); - + { c1.seek(0); // reset to first pos in file BOOST_REQUIRE_EQUAL(c1.tell(), 0); } - + { c2.seek(0); // reset to first pos in file BOOST_REQUIRE_EQUAL(c2.tell(), 0); @@ -412,7 +412,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - + { written = c1.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); @@ -421,7 +421,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - + { written = c2.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); @@ -465,13 +465,13 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) { chunked_reader_old r_flat = chunked_reader_old(fastafs_file.c_str()); - + // Context equivalent - uncompressed chunked_reader c1(fastafs_file.c_str()); c1.fopen(0); BOOST_CHECK(c1.typeid_state() == typeid(ContextUncompressed)); BOOST_CHECK(c1.typeid_state() != typeid(ContextZstdSeekable)); - + // Context equivalent - compressed chunked_reader c2(fastafs_file_zstd.c_str()); c2.fopen(0); @@ -485,16 +485,18 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content 1st read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - { // C1 + + { + // C1 written = c1.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 1024); std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content 1st read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - - { // C2 + + { + // C2 written = c2.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 1024); std_buffer = std::string(reinterpret_cast(&buffer), written); @@ -508,16 +510,18 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - { // C1 + + { + // C1 written = c1.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 569); std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - - { // C2 + + { + // C2 written = c2.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 569); std_buffer = std::string(reinterpret_cast(&buffer), written); @@ -528,14 +532,16 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) written = r_flat.read((char*) &buffer[0], 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - { // C1 + + { + // C1 written = c1.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - - { // C2 + + { + // C2 written = c2.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); @@ -545,111 +551,170 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { + // C1 + written = c1.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + { + // C2 + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 0); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + // set back r_flat.seek(1024); + { + // C1 + c1.seek(1024); + } + + { + // C2 + c2.seek(1024); + } + + written = r_flat.read((char*) &buffer[0], 1024); BOOST_CHECK_EQUAL(written, 569); std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { // C1 + written = c1.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 569); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + { // C2 + written = c2.read(buffer, 1024); + BOOST_CHECK_EQUAL(written, 569); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + // set back r_flat.seek(4); + { // C1 + c1.seek(4); + } + + { // C2 + c2.seek(4); + } + written = r_flat.read((char*) &buffer[0], 1024);// reads across two buffers? BOOST_CHECK_EQUAL(written, 1024); std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - written = r_flat.read((char*) &buffer[0], 1024);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 565); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference4), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { // C1 + written = c1.read(buffer, 1024);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 1024); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + { // C2 + written = c2.read(buffer, 1024);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 1024); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } - r_flat.seek(4); - - written = r_flat.read((char*) &buffer[0], 4);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 4); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference5), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); written = r_flat.read((char*) &buffer[0], 1024);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 1024); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference6), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - } - - { - chunked_reader_old r_zstd = chunked_reader_old(fastafs_file_zstd.c_str()); - - written = r_zstd.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 1024); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content 1st read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - written = r_zstd.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 569); + BOOST_CHECK_EQUAL(written, 565); std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - written = r_zstd.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 0); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference4), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - written = r_zstd.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 0); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { // C1 + written = c1.read(buffer, 1024);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 565); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference4), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + { // C2 + written = c2.read(buffer, 1024);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 565); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference4), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } - // set back - r_zstd.seek(1024); - written = r_zstd.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 569); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + r_flat.seek(4); + { // C1 + c1.seek(4); + } - // set back - r_zstd.seek(4); + { // C2 + c2.seek(4); + } - written = r_zstd.read(buffer, 1024);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 1024); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - written = r_zstd.read(buffer, 1024);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 565); + written = r_flat.read((char*) &buffer[0], 4);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 4); std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference4), 0, "Difference in content 2nd read"); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference5), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { // C1 + written = c1.read(buffer, 4);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 4); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference5), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } - r_zstd.seek(4); + { // C2 + written = c2.read(buffer, 4);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 4); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference5), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } - written = r_zstd.read(buffer, 4);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 4); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference5), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - written = r_zstd.read(buffer, 1024);// reads across two buffers? + written = r_flat.read((char*) &buffer[0], 1024);// reads across two buffers? BOOST_CHECK_EQUAL(written, 1024); std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference6), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + + { // C1 + written = c1.read(buffer, 1024);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 1024); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference6), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } + + { // C2 + written = c2.read(buffer, 1024);// reads across two buffers? + BOOST_CHECK_EQUAL(written, 1024); + std_buffer = std::string(reinterpret_cast(&buffer), written); + BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference6), 0, "Difference in content 2nd read"); + flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + } } } @@ -684,19 +749,31 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__new_style) { - ////chunked_reader_old r_flat = chunked_reader_old(fastafs_file.c_str()); - - chunked_reader c = chunked_reader(fasta_file.c_str()); - c.fopen(0); // open file handle and load buffer - - written = c.read(buffer, 10); - buffer[written] = '\0'; - - written = c.read(buffer, 100); - buffer[written] = '\0'; - - written = c.read(buffer, 100); - buffer[written] = '\0'; + chunked_reader c1 = chunked_reader(fastafs_file.c_str()); + c1.fopen(0); // open file handle and load buffer + + chunked_reader c2 = chunked_reader(fastafs_file_zstd.c_str()); + c2.fopen(0); // open file handle and load buffer + + { // C1 + written = c1.read(buffer, 1000); + buffer[written] = '\0'; + BOOST_CHECK_EQUAL(written, 1000); + + written = c1.read(buffer, 1000); + buffer[written] = '\0'; + BOOST_CHECK_EQUAL(written, 593); + } + + { // C2 + written = c2.read(buffer, 1000); + buffer[written] = '\0'; + BOOST_CHECK_EQUAL(written, 1000); + + written = c2.read(buffer, 1000); + buffer[written] = '\0'; + BOOST_CHECK_EQUAL(written, 593); + } } } diff --git a/test/fastafs/test_fastafs.cpp b/test/fastafs/test_fastafs.cpp index aded593d..53552e13 100644 --- a/test/fastafs/test_fastafs.cpp +++ b/test/fastafs/test_fastafs.cpp @@ -111,7 +111,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_seq_fastafile_size_padding_0) char chunk[1]; std::string ref = ">chr1\nttttccccaaaagggg\n"; - + for(uint32_t i = 0; i < ref.size(); i++) { ret = fs.data[0]->view_fasta_chunk(cache_p0->sequences[0], chunk, 1, i, file); BOOST_CHECK_EQUAL(chunk[0], ref[i]); // test for '>' diff --git a/test/fastafs/test_ucsc2bit.cpp b/test/fastafs/test_ucsc2bit.cpp index 1309e2d0..63df271e 100644 --- a/test/fastafs/test_ucsc2bit.cpp +++ b/test/fastafs/test_ucsc2bit.cpp @@ -465,10 +465,8 @@ BOOST_AUTO_TEST_CASE(test_fastafs_view_chunked_2bit_with_offset) uint32_t complen; // voor lengte 1...(245-1) // voor i = 0, 245-lengte - for(complen = 1; complen < reference.size(); complen++) - { - for(uint32_t file_offset = 0; file_offset < reference.size() - complen - 1; file_offset++) - { + for(complen = 1; complen < reference.size(); complen++) { + for(uint32_t file_offset = 0; file_offset < reference.size() - complen - 1; file_offset++) { fs.view_ucsc2bit_chunk(buffer, complen, file_offset); BOOST_CHECK_EQUAL_MESSAGE(reference.compare(file_offset, complen, std_string_nullbyte_safe(buffer, 0, complen), 0, complen), 0, "Failed during len=" << complen << " and file offset=" << file_offset); } diff --git a/test/view/test_view.cpp b/test/view/test_view.cpp index 74bdd94c..280ab90d 100644 --- a/test/view/test_view.cpp +++ b/test/view/test_view.cpp @@ -146,7 +146,7 @@ BOOST_AUTO_TEST_CASE(test_fastafs_twobit_offset_calc) fasta_to_fastafs("test/data/test.fa", fastafs_file, false); fastafs fs = fastafs("test"); fs.load(fastafs_file); - + for(uint32_t i = 0 ; i <= 7; i++) { in_N = fs.data[1]->get_n_offset(i, &num_Ns); BOOST_CHECK_EQUAL(num_Ns, 0); From 5d37a2853f27065526b75bb1c8cc59217404f729 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sun, 22 Jan 2023 15:22:48 +0100 Subject: [PATCH 64/65] resovles test issues --- test/view/test_view.cpp | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/test/view/test_view.cpp b/test/view/test_view.cpp index 280ab90d..cc8fbe67 100644 --- a/test/view/test_view.cpp +++ b/test/view/test_view.cpp @@ -550,6 +550,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_fourbit) // it can return less bytes than the buffer_size BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen) { +#if DEBUG printf("test %i\n", ++test_i); BOOST_REQUIRE_EQUAL(READ_BUFFER_SIZE, 4096);// required for this test @@ -576,6 +577,11 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen) delete[] buffer; delete cache_p0; + +#else + // for this test a small buffer size is needed, only used for debugging - therefore always test with debug on +#endif //DEBUG + } @@ -584,6 +590,8 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen) // it can return less bytes than the buffer_size BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen_lim) { +#if DEBUG + printf("test %i\n", ++test_i); BOOST_REQUIRE_EQUAL(READ_BUFFER_SIZE, 4096);// required for this test @@ -610,6 +618,11 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen_lim) delete[] buffer; delete cache_p0; + +#else + // for this test a small buffer size is needed, only used for debugging - therefore always test with debug on +#endif //DEBUG + } @@ -619,6 +632,8 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen_lim) // it can return less bytes than the buffer_size BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen2) { +#if DEBUG + printf("test %i\n", ++test_i); BOOST_REQUIRE_EQUAL(READ_BUFFER_SIZE, 4096);// required for this test @@ -652,6 +667,10 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing_buffermaxlen2) delete[] buffer; delete cache_p72; + +#else + // for this test a small buffer size is needed, only used for debugging - therefore always test with debug on +#endif //DEBUG } @@ -904,7 +923,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_viewing2) uint32_t start_pos = 0; for(float i = 0.0; i <= 12.0; i += 1) { // perform limited subset of tests start_pos = (uint32_t)((i / 12.0) * (double) n); - printf(" - %uli / %zu\n", start_pos, n); + printf(" - %u / %zu\n", start_pos, n); for(uint32_t buffer_len = (uint32_t) full_file.size() - start_pos; buffer_len > 0; buffer_len--) { std::string substr_file = std::string(full_file, start_pos, buffer_len); From 97b6682d9afba1ad0f07d84db8bfcc8e5c2384b8 Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Sun, 22 Jan 2023 15:44:48 +0100 Subject: [PATCH 65/65] updates --- CMakeLists.txt | 2 +- Changelog | 7 + include/chunked_reader.hpp | 49 ---- src/chunked_reader.cpp | 249 ------------------- src/fuse.cpp | 4 +- test/chunked_reader/test_chunked_reader.cpp | 256 ++++++-------------- 6 files changed, 81 insertions(+), 486 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 495723b9..5f8a45f9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,7 +9,7 @@ project(fastafs) # Do this once in a while - find different compiler warnings #set(CMAKE_CXX_COMPILER "clang++") -set(PROJECT_VERSION "1.9.0") +set(PROJECT_VERSION "1.10.0") set(PACKAGE_URL "https://github.com/yhoogstrate/fastafs") set(PACKAGE_BUGREPORT "${PACKAGE_URL}/issues") diff --git a/Changelog b/Changelog index 25524c01..502ac8aa 100644 --- a/Changelog +++ b/Changelog @@ -1,3 +1,10 @@ +2023-01-22 + * v.10.0 + * Better ninja/meson support + * Code clean-ups + * Restructured the chunked_reader class and subclasses according to + the desgin patterns philisopy + 2020-04-29 Youri Hoogstrate * v1.9.0 diff --git a/include/chunked_reader.hpp b/include/chunked_reader.hpp index d47b7533..5d9e615a 100644 --- a/include/chunked_reader.hpp +++ b/include/chunked_reader.hpp @@ -30,55 +30,6 @@ enum compression_type : signed char { // dit is State //url: https://refactoring.guru/design-patterns/state - -class chunked_reader_old // dit is Context -{ -private: - std::ifstream *fh_flat; - ZSTD_seekable_decompress_init_data* fh_zstd; - - std::string filename; // try doing this with inode - - compression_type filetype; - - char buffer[READ_BUFFER_SIZE + 1]; - size_t buffer_i; - size_t buffer_n; - - off_t file_i; - - -public: - chunked_reader_old(char *); // filename - chunked_reader_old(const char *); // filename - ~chunked_reader_old(); - - void init(); // generic tasks needed for init - - void update_flat_buffer(); - void update_zstd_buffer(); - - - void find_filetype(); - - void set_filetype(compression_type); - compression_type get_filetype() - { - return this->filetype ; - }; - - size_t read(char *, size_t);// @deprecate - size_t read(unsigned char *, size_t); - unsigned char read(); - - void seek(off_t); - size_t tell(); - //size_t size(); -}; - - - - class chunked_reader; class State diff --git a/src/chunked_reader.cpp b/src/chunked_reader.cpp index 6d138c79..5cada62b 100644 --- a/src/chunked_reader.cpp +++ b/src/chunked_reader.cpp @@ -3,255 +3,6 @@ -chunked_reader_old::chunked_reader_old(char * afilename) : - fh_flat(nullptr), fh_zstd(nullptr), buffer_i(0), buffer_n(0), file_i(0) -{ - this->filename = realpath_cpp(afilename); - this->init(); -} - -chunked_reader_old::chunked_reader_old(const char * afilename) : - fh_flat(nullptr), fh_zstd(nullptr), buffer_i(0), buffer_n(0), file_i(0) -{ - this->filename = realpath_cpp(afilename); - this->init(); -} - -chunked_reader_old::~chunked_reader_old() -{ - if(this->fh_flat != nullptr) { - if(this->fh_flat->is_open()) { - this->fh_flat->close(); - } - - delete this->fh_flat; - } - - if(this->fh_zstd != nullptr) { - //ZSTD_seekable_free(this->fh_zstd->seekable); - fclose_orDie(this->fh_zstd->fin); - - //delete this->fh_zstd->seekable; - //delete this->fh_zstd->fin; - - delete this->fh_zstd; - } -} - -void chunked_reader_old::init() -{ - this->find_filetype(); - - switch(this->filetype) { - - case uncompressed: - this->fh_flat = new std::ifstream; - this->fh_flat->open(this->filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate); - - if(this->fh_flat->is_open()) { - this->fh_flat->seekg(0, std::ios::beg); - this->update_flat_buffer(); - } else { - throw std::runtime_error("[chunked_reader_old::init] Cannot open file for reading.\n"); - } - break; - - case zstd: - //printf("[chunked_reader_old::init()] - init ZSTD_seekable_decompress_init_data* fh_zstd; \n"); - this->fh_zstd = ZSTD_seekable_decompressFile_init(this->filename.c_str()); - // make zstd handle - to be implemented later on - //ZSTD_seekable_decompress_data - break; - - default: - throw std::runtime_error("[chunked_reader_old::init] Should never happen - but avoids compiler warning.\n"); - break; - } -} - -void chunked_reader_old::find_filetype() -{ - if(is_zstd_file((const char*) this->filename.c_str())) { - this->set_filetype(zstd); - } else { - this->set_filetype(uncompressed); - } -} - -void chunked_reader_old::set_filetype(compression_type arg_filetype) -{ - this->filetype = arg_filetype; -} - - -void set_filtetype(compression_type &filetype_arg); - - -size_t chunked_reader_old::read(char *arg_buffer, size_t buffer_size) -{ - buffer_size = std::min(buffer_size, (size_t) READ_BUFFER_SIZE); - size_t written = 0; - - while(this->buffer_i < this->buffer_n and written < buffer_size) { - arg_buffer[written++] = this->buffer[this->buffer_i++]; - } - - - - /* - size_t n = std::min(this->buffer_n - this->buffer_i, buffer_size - written); - memcpy(&arg_buffer[written], &this->buffer[this->buffer_i] , n); - written += n; - this->buffer_i += n; - */ - - if(written < buffer_size) { - // overwrite buffer - switch(this->filetype) { - case uncompressed: - this->update_flat_buffer(); - break; - case zstd: - this->update_zstd_buffer(); - break; - default: - throw std::runtime_error("[chunked_reader_old::read] reading from uninitialized object\n"); - break; - } - - // same loop again - while(this->buffer_i < this->buffer_n and written < buffer_size) { - arg_buffer[written++] = this->buffer[this->buffer_i++]; - } - /* - somehow memcpy is slightly slower - test again @ mom laptop - size_t n = std::min(this->buffer_n - this->buffer_i, buffer_size - written); - memcpy(&arg_buffer[written], &this->buffer[this->buffer_i] , n); - written += n; - this->buffer_i += n; - */ - } - - return written; -} - - - - - -size_t chunked_reader_old::read(unsigned char *arg_buffer, size_t buffer_size) -{ - - - buffer_size = std::min(buffer_size, (size_t) READ_BUFFER_SIZE); - size_t written = 0; - - while(this->buffer_i < this->buffer_n and written < buffer_size) { - arg_buffer[written++] = this->buffer[this->buffer_i++]; - } - - - if(written < buffer_size) { - // overwrite buffer - switch(this->filetype) { - case uncompressed: - this->update_flat_buffer(); - break; - case zstd: - this->update_zstd_buffer(); - break; - default: - throw std::runtime_error("[chunked_reader_old::read] reading from uninitialized object\n"); - break; - } - - // same loop again - while(this->buffer_i < this->buffer_n and written < buffer_size) { - arg_buffer[written++] = this->buffer[this->buffer_i++]; - } - } - - return written; -} - - - -// reads single byte from the buffer -unsigned char chunked_reader_old::read() -{ - if(this->buffer_i >= this->buffer_n) { - switch(this->filetype) { - case uncompressed: - this->update_flat_buffer(); - break; - case zstd: - this->update_zstd_buffer(); - break; - default: - throw std::runtime_error("[chunked_reader_old::read] reading from uninitialized object\n"); - break; - } - } - - return this->buffer[this->buffer_i++]; -} - - - - -void chunked_reader_old::update_flat_buffer() -{ - this->fh_flat->read(this->buffer, READ_BUFFER_SIZE); - - this->buffer_i = 0; - this->buffer_n = (size_t) this->fh_flat->gcount(); - this->file_i += this->buffer_n; -} - - -void chunked_reader_old::update_zstd_buffer() -{ - //size_t written = ZSTD_seekable_decompressFile_orDie(this->filename.c_str(), this->file_i, this->buffer, this->file_i + READ_BUFFER_SIZE); - size_t written = ZSTD_seekable_decompressFile_orDie(this->fh_zstd, this->file_i, this->buffer, this->file_i + READ_BUFFER_SIZE); - - this->buffer_i = 0; - this->buffer_n = written; - this->file_i += written; -} - - - -void chunked_reader_old::seek(off_t offset) -{ - this->file_i = offset; - - switch(this->filetype) { - case uncompressed: - this->fh_flat->clear(); // reset error state - - if(!this->fh_flat->is_open()) { - this->fh_flat->open(this->filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate); - } - - this->fh_flat->seekg(offset, std::ios::beg); - this->update_flat_buffer(); - break; - default: - this->update_zstd_buffer(); - break; - } -} - - -size_t chunked_reader_old::tell() -{ - //@todo decide what to return when out of bound - //e.g. when exceeding file size - - return this->file_i - this->buffer_n + this->buffer_i; -} - - - void State::set_context(chunked_reader *arg_context) { this->context = arg_context; diff --git a/src/fuse.cpp b/src/fuse.cpp index 1b9c11a7..1a4dd8e8 100644 --- a/src/fuse.cpp +++ b/src/fuse.cpp @@ -168,7 +168,7 @@ static int do_getattr(const char *path, struct stat *st) -static int do_readdir(const char *path, void *buffer, fuse_fill_dir_t filler, off_t offset, struct fuse_file_info *fi) +static int do_readdir(const char *path, void *buffer, fuse_fill_dir_t filler, __attribute__((__unused__)) off_t offset, __attribute__((__unused__)) struct fuse_file_info *fi) { fuse_instance *ffi = static_cast(fuse_get_context()->private_data); @@ -272,7 +272,7 @@ static int do_open(const char *path, struct fuse_file_info *fi) return 0; } -static int do_flush(const char *path, struct fuse_file_info *fi) +static int do_flush(const char *path, __attribute__((__unused__)) struct fuse_file_info *fi) { return 0; } diff --git a/test/chunked_reader/test_chunked_reader.cpp b/test/chunked_reader/test_chunked_reader.cpp index dfcecc99..db8b11be 100644 --- a/test/chunked_reader/test_chunked_reader.cpp +++ b/test/chunked_reader/test_chunked_reader.cpp @@ -55,9 +55,6 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) std::string reference3 = "\x0a\x46\x53\x00"s; { - // old init - chunked_reader_old r_flat = chunked_reader_old(fastafs_file.c_str()); - // Context equivalent - uncompressed chunked_reader c1(fastafs_file.c_str()); c1.fopen(0); @@ -71,15 +68,6 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) BOOST_CHECK(c2.typeid_state() != typeid(ContextUncompressed)); - - BOOST_CHECK_EQUAL(r_flat.tell(), 0); - written = r_flat.read((char*) &buffer[0], 1024); - BOOST_CHECK_EQUAL(written, 403); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - BOOST_CHECK_EQUAL(r_flat.tell(), 403); - // Context equivalent - uncompressed { BOOST_CHECK_EQUAL(c1.tell(), 0); @@ -112,16 +100,6 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) } - // test what happens when file is closed (twice) - written = r_flat.read((char*) &buffer[0], 1024); - BOOST_CHECK_EQUAL(written, 0); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - BOOST_CHECK_EQUAL(r_flat.tell(), 403); - - written = r_flat.read((char*) &buffer[0], 1024); - BOOST_CHECK_EQUAL(written, 0); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - BOOST_CHECK_EQUAL(r_flat.tell(), 403); // Context equivalent - uncompressed { @@ -150,17 +128,8 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) } - // test seek stuff - BOOST_CHECK_EQUAL(r_flat.tell(), 403); - r_flat.seek(0); - BOOST_CHECK_EQUAL(r_flat.tell(), 0); - r_flat.seek(1); - BOOST_CHECK_EQUAL(r_flat.tell(), 1); - r_flat.seek(402); - BOOST_CHECK_EQUAL(r_flat.tell(), 402); - - // Context equivalent - uncompressed { + // Context equivalent - uncompressed BOOST_CHECK_EQUAL(c1.tell(), 403); c1.seek(0); BOOST_CHECK_EQUAL(c1.tell(), 0); @@ -169,9 +138,8 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) c1.seek(402); BOOST_CHECK_EQUAL(c1.tell(), 402); } - - // Context equivalent - compressed zstd { + // Context equivalent - compressed zstd BOOST_CHECK_EQUAL(c2.tell(), 403); c2.seek(0); BOOST_CHECK_EQUAL(c2.tell(), 0); @@ -182,16 +150,8 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) } - r_flat.seek(0); - written = r_flat.read((char*) &buffer[0], 4); - BOOST_CHECK_EQUAL(written, 4); - BOOST_CHECK_EQUAL(r_flat.tell(), 4); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - // Context equivalent - uncompressed { + // Context equivalent - uncompressed c1.seek(0); BOOST_CHECK_EQUAL(c1.tell(), 0); written = c1.read(buffer, 4); @@ -202,10 +162,8 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - - - // Context equivalent - compressed zstd { + // Context equivalent - compressed zstd c2.seek(0); BOOST_CHECK_EQUAL(c2.tell(), 0); written = c2.read(buffer, 4); @@ -218,18 +176,10 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) } - r_flat.seek(1); // reset to first pos in file - BOOST_CHECK_EQUAL(r_flat.tell(), 1); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - written = r_flat.read((char*) &buffer[0], 4); - BOOST_CHECK_EQUAL(written, 4); - BOOST_CHECK_EQUAL(r_flat.tell(), 5); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - // Context equivalent - uncompressed + { + // Context equivalent - uncompressed c1.seek(1); BOOST_CHECK_EQUAL(c1.tell(), 1); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); @@ -238,11 +188,17 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) BOOST_CHECK_EQUAL(c1.tell(), 5); std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); + if(std_buffer.compare(reference3) != 0) { + printf("%u != %u\n", (unsigned int) reference3.size(), (unsigned int) std_buffer.size()); + printf("%s != %s\n", reference3.c_str(), std_buffer.c_str()); + + printf("[%u][%u][%u][%u]\n", (unsigned char) reference3[0], reference3[1], reference3[2], reference3[3]); + printf("[%u][%u][%u][%u]\n", (unsigned char) buffer[0], buffer[1], buffer[2], buffer[3]); + } flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - - // Context equivalent - compressed zstd { + // Context equivalent - compressed zstd c2.seek(1); BOOST_CHECK_EQUAL(c2.tell(), 1); @@ -259,29 +215,22 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) printf("[%u][%u][%u][%u]\n", (unsigned char) reference3[0], reference3[1], reference3[2], reference3[3]); printf("[%u][%u][%u][%u]\n", (unsigned char) buffer[0], buffer[1], buffer[2], buffer[3]); - //printf("[%c][%c][%c][%c]\n"); } flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - //@todo should trigger error!? - r_flat.seek(1024 * 1024); // trigger out of bound - - // Context equivalent - uncompressed { + // Context equivalent - uncompressed c1.seek(1024 * 1024); } - - // Context equivalent - compressed zstd { + // Context equivalent - compressed zstd c2.seek(1024 * 1024); } } { - chunked_reader_old r_zstd = chunked_reader_old(fastafs_file_zstd.c_str()); - // Context equivalent - uncompressed chunked_reader c1(fastafs_file.c_str()); c1.fopen(0); @@ -296,21 +245,16 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) - written = r_zstd.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 403); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - { + // C1 written = c1.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 403); std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - { + // C2 written = c2.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 403); std_buffer = std::string(reinterpret_cast(&buffer), written); @@ -318,61 +262,52 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - written = r_zstd.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 0); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { + // C1 written = c1.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - { + // C2 written = c2.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } // test what happens when file is closed - written = r_zstd.read(buffer, 1024); - BOOST_CHECK_EQUAL(written, 0); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - { + // C1 written = c1.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - { + // C2 written = c2.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - // test seek stuff - r_zstd.seek(0); // reset to first pos in file - BOOST_REQUIRE_EQUAL(r_zstd.tell(), 0); + // test seek stuff { + // C1 c1.seek(0); // reset to first pos in file BOOST_REQUIRE_EQUAL(c1.tell(), 0); } - { + // C2 c2.seek(0); // reset to first pos in file BOOST_REQUIRE_EQUAL(c2.tell(), 0); } - written = r_zstd.read(buffer, 4); - BOOST_CHECK_EQUAL(written, 4); - BOOST_CHECK_EQUAL(r_zstd.tell(), 4); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { + // C1 written = c1.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); BOOST_CHECK_EQUAL(c1.tell(), 4); @@ -380,8 +315,8 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - { + // C2 written = c2.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); BOOST_CHECK_EQUAL(c2.tell(), 4); @@ -390,30 +325,25 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - r_zstd.seek(1); // reset to first pos in file - BOOST_CHECK_EQUAL(r_zstd.tell(), 1); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { + // C1 c1.seek(1); // reset to first pos in file BOOST_CHECK_EQUAL(c1.tell(), 1); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - { + // C2 c2.seek(1); // reset to first pos in file BOOST_CHECK_EQUAL(c2.tell(), 1); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - written = r_zstd.read(buffer, 4); - BOOST_CHECK_EQUAL(written, 4); - BOOST_CHECK_EQUAL(r_zstd.tell(), 5); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { + // C1 written = c1.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); BOOST_CHECK_EQUAL(c1.tell(), 5); @@ -421,8 +351,8 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__small_file) BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - { + // C2 written = c2.read(buffer, 4); BOOST_CHECK_EQUAL(written, 4); BOOST_CHECK_EQUAL(c2.tell(), 5); @@ -464,8 +394,6 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) { - chunked_reader_old r_flat = chunked_reader_old(fastafs_file.c_str()); - // Context equivalent - uncompressed chunked_reader c1(fastafs_file.c_str()); c1.fopen(0); @@ -479,13 +407,6 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) BOOST_CHECK(c2.typeid_state() != typeid(ContextUncompressed)); - - written = r_flat.read((char*) &buffer[0], 1024); - BOOST_CHECK_EQUAL(written, 1024); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content 1st read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - { // C1 written = c1.read(buffer, 1024); @@ -494,7 +415,6 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference1), 0, "Difference in content 1st read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - { // C2 written = c2.read(buffer, 1024); @@ -505,11 +425,6 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) } - written = r_flat.read((char*) &buffer[0], 1024); - BOOST_CHECK_EQUAL(written, 569); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); { // C1 @@ -529,9 +444,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - written = r_flat.read((char*) &buffer[0], 1024); - BOOST_CHECK_EQUAL(written, 0); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { // C1 @@ -539,7 +452,6 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - { // C2 written = c2.read(buffer, 1024); @@ -547,9 +459,7 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - written = r_flat.read((char*) &buffer[0], 1024); - BOOST_CHECK_EQUAL(written, 0); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); + { // C1 @@ -557,7 +467,6 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) BOOST_CHECK_EQUAL(written, 0); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - { // C2 written = c2.read(buffer, 1024); @@ -566,35 +475,28 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) } - // set back - r_flat.seek(1024); - + // set back / seek { // C1 c1.seek(1024); } - { // C2 c2.seek(1024); } - written = r_flat.read((char*) &buffer[0], 1024); - BOOST_CHECK_EQUAL(written, 569); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - { // C1 + { + // C1 written = c1.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 569); std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference2), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - - { // C2 + { + // C2 written = c2.read(buffer, 1024); BOOST_CHECK_EQUAL(written, 569); std_buffer = std::string(reinterpret_cast(&buffer), written); @@ -603,32 +505,28 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) } - // set back - r_flat.seek(4); - { // C1 + { + // C1 c1.seek(4); } - - { // C2 + { + // C2 c2.seek(4); } - written = r_flat.read((char*) &buffer[0], 1024);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 1024); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - { // C1 + + { + // C1 written = c1.read(buffer, 1024);// reads across two buffers? BOOST_CHECK_EQUAL(written, 1024); std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference3), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - - { // C2 + { + // C2 written = c2.read(buffer, 1024);// reads across two buffers? BOOST_CHECK_EQUAL(written, 1024); std_buffer = std::string(reinterpret_cast(&buffer), written); @@ -637,21 +535,16 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) } - written = r_flat.read((char*) &buffer[0], 1024);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 565); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference4), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - - { // C1 + { + // C1 written = c1.read(buffer, 1024);// reads across two buffers? BOOST_CHECK_EQUAL(written, 565); std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference4), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - - { // C2 + { + // C2 written = c2.read(buffer, 1024);// reads across two buffers? BOOST_CHECK_EQUAL(written, 565); std_buffer = std::string(reinterpret_cast(&buffer), written); @@ -660,32 +553,27 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) } - r_flat.seek(4); - - { // C1 + { + // C1 c1.seek(4); } - - { // C2 + { + // C2 c2.seek(4); } - written = r_flat.read((char*) &buffer[0], 4);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 4); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference5), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - { // C1 + { + // C1 written = c1.read(buffer, 4);// reads across two buffers? BOOST_CHECK_EQUAL(written, 4); std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference5), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - - { // C2 + { + // C2 written = c2.read(buffer, 4);// reads across two buffers? BOOST_CHECK_EQUAL(written, 4); std_buffer = std::string(reinterpret_cast(&buffer), written); @@ -694,21 +582,17 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__large_file) } - written = r_flat.read((char*) &buffer[0], 1024);// reads across two buffers? - BOOST_CHECK_EQUAL(written, 1024); - std_buffer = std::string(reinterpret_cast(&buffer), written); - BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference6), 0, "Difference in content 2nd read"); - flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); - { // C1 + { + // C1 written = c1.read(buffer, 1024);// reads across two buffers? BOOST_CHECK_EQUAL(written, 1024); std_buffer = std::string(reinterpret_cast(&buffer), written); BOOST_CHECK_EQUAL_MESSAGE(std_buffer.compare(reference6), 0, "Difference in content 2nd read"); flush_buffer(buffer, READ_BUFFER_SIZE + 1, '\0'); } - - { // C2 + { + // C2 written = c2.read(buffer, 1024);// reads across two buffers? BOOST_CHECK_EQUAL(written, 1024); std_buffer = std::string(reinterpret_cast(&buffer), written); @@ -755,7 +639,8 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__new_style) chunked_reader c2 = chunked_reader(fastafs_file_zstd.c_str()); c2.fopen(0); // open file handle and load buffer - { // C1 + { + // C1 written = c1.read(buffer, 1000); buffer[written] = '\0'; BOOST_CHECK_EQUAL(written, 1000); @@ -765,7 +650,8 @@ BOOST_AUTO_TEST_CASE(test_chunked_reader_old__new_style) BOOST_CHECK_EQUAL(written, 593); } - { // C2 + { + // C2 written = c2.read(buffer, 1000); buffer[written] = '\0'; BOOST_CHECK_EQUAL(written, 1000);