Skip to content

Commit

Permalink
updates
Browse files Browse the repository at this point in the history
  • Loading branch information
yhoogstrate committed Jan 22, 2023
1 parent 2b8a596 commit 15065e5
Show file tree
Hide file tree
Showing 9 changed files with 254 additions and 201 deletions.
13 changes: 8 additions & 5 deletions include/chunked_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ class chunked_reader_old // dit is Context
ZSTD_seekable_decompress_init_data* fh_zstd;

std::string filename; // try doing this with inode

compression_type filetype;

char buffer[READ_BUFFER_SIZE + 1];
size_t buffer_i;
size_t buffer_n;
Expand All @@ -62,7 +62,10 @@ class chunked_reader_old // dit is Context
void find_filetype();

void set_filetype(compression_type);
compression_type get_filetype() { return this->filetype ; };
compression_type get_filetype()
{
return this->filetype ;
};

size_t read(char *, size_t);// @deprecate
size_t read(unsigned char *, size_t);
Expand Down Expand Up @@ -113,7 +116,7 @@ class ContextZstdSeekable : public State
{
private:
ZSTD_seekable_decompress_init_data* fh = nullptr;

size_t const buffOutSize = ZSTD_DStreamOutSize();
char* const buffOut = (char*) malloc_orDie(buffOutSize);
ZSTD_seekable* const seekable = ZSTD_seekable_create(); //@todo -> in constructor, check if not NULL
Expand Down Expand Up @@ -147,7 +150,7 @@ class chunked_reader // master chunked_reader
void TransitionTo(State *); // @todo rename to set_compression_type
chunked_reader(const char *) ;
~chunked_reader();

State* find_state();
const std::type_info& typeid_state();

Expand Down
135 changes: 55 additions & 80 deletions src/chunked_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,30 +259,26 @@ void State::set_context(chunked_reader *arg_context)

// This does not read the actual flat file, this copies its internal buffer to arg_buffer_to
size_t State::read(unsigned char *arg_buffer_to, size_t arg_buffer_to_size,
size_t &buffer_i, size_t &buffer_n)
size_t &buffer_i, size_t &buffer_n)
{
#if DEBUG
if(arg_buffer_to_size > READ_BUFFER_SIZE)
{
if(arg_buffer_to_size > READ_BUFFER_SIZE) {
throw std::runtime_error("[ContextUncompressed::read] Requested buffer size larger than internal context buffer.\n");
}
#endif //DEBUG

size_t written = 0;
const size_t n1 = std::min(buffer_n - buffer_i, arg_buffer_to_size);// number of characters to copy

// copy current internal buffer completely
while(written < n1)
{
while(written < n1) {
arg_buffer_to[written++] = this->context->get_buffer()[buffer_i++];
}

if(written < arg_buffer_to_size)
{
if(written < arg_buffer_to_size) {
this->context->cache_buffer();// needs to set n to 0

while(buffer_i < buffer_n and written < arg_buffer_to_size)
{
while(buffer_i < buffer_n and written < arg_buffer_to_size) {
arg_buffer_to[written++] = this->context->get_buffer()[buffer_i++];
}
}
Expand Down Expand Up @@ -321,16 +317,11 @@ char * chunked_reader::get_buffer()
//@todo remove and use typeid only
compression_type chunked_reader::get_filetype()
{
if(this->typeid_state() == typeid(ContextUncompressed))
{
if(this->typeid_state() == typeid(ContextUncompressed)) {
return compression_type::uncompressed;
}
else if(this->typeid_state() == typeid(ContextZstdSeekable))
{
} else if(this->typeid_state() == typeid(ContextZstdSeekable)) {
return compression_type::zstd;
}
else
{
} else {
return compression_type::undefined;
}
}
Expand All @@ -345,36 +336,34 @@ size_t chunked_reader::cache_buffer()

this->buffer_i = 0;
this->file_i += s;

return s;
}

size_t chunked_reader::read(unsigned char *arg_buffer, size_t arg_buffer_size)
{
//arg_buffer_size = std::min(arg_buffer_size, (size_t) READ_BUFFER_SIZE);
#if DEBUG

if(arg_buffer == nullptr)
{

if(arg_buffer == nullptr) {
throw std::runtime_error("[chunked_reader::read] Invalid / not allocated buffer.\n");
}

if(arg_buffer_size > READ_BUFFER_SIZE)
{
if(arg_buffer_size > READ_BUFFER_SIZE) {
throw std::runtime_error("[chunked_reader::read] Requested buffer size larger than internal context buffer.\n");
}

#endif //DEBUG

return this->state->read(arg_buffer, arg_buffer_size, this->buffer_i, this->buffer_n);

}


void chunked_reader::TransitionTo(State *arg_state) {
void chunked_reader::TransitionTo(State *arg_state)
{

if(this->state != nullptr)
{
if(this->state != nullptr) {
delete this->state; // delete and destruct previous state, incl file points, should also run fh.close(); etc.
}

Expand All @@ -401,11 +390,11 @@ void chunked_reader::seek(off_t arg_offset)
size_t chunked_reader::tell()
{
//printf("Context :: tell: %i - %i + %i = %i\n",
//this->file_i ,
//this->buffer_n ,
//this->file_i ,
//this->buffer_n ,
//this->buffer_i ,
//this->file_i - this->buffer_n + this->buffer_i);
//this->file_i - this->buffer_n + this->buffer_i);

return this->file_i - this->buffer_n + this->buffer_i;
}

Expand All @@ -424,51 +413,41 @@ const std::type_info& chunked_reader::typeid_state()

State *chunked_reader::find_state()
{
if(is_zstd_file(this->filename.c_str()))
{
if(is_zstd_file(this->filename.c_str())) {
return new ContextZstdSeekable;
}
else
{
} else {
return new ContextUncompressed;
}
}


void ContextUncompressed::fopen(off_t start_pos = 0)
{
if(this->fh != nullptr)
{
if(this->fh != nullptr) {
throw std::runtime_error("[ContextUncompressed::fopen] opening a non closed reader.\n");
}

this->fh = new std::ifstream;
this->fh->open(this->context->get_filename().c_str(), std::ios::in | std::ios::binary | std::ios::ate);
if(this->fh == nullptr)
{
if(this->fh == nullptr) {
throw std::runtime_error("[ContextUncompressed::fopen] empty fh?\n");
}

if(this->fh->is_open()) // @todo move to top-level fopen()
{
if(this->fh->is_open()) { // @todo move to top-level fopen()
this->seek(start_pos);
}
else
{
} else {
throw std::runtime_error("[chunked_reader_old::init] Cannot open file for reading.\n");
}
}

size_t ContextUncompressed::cache_buffer()
{
#if DEBUG
if(this->fh->tellg() == -1)
{
if(this->fh->tellg() == -1) {
throw std::runtime_error("ContextUncompressed::cache_buffer\n");
}

if(this->context->get_buffer() == nullptr)
{

if(this->context->get_buffer() == nullptr) {
throw std::runtime_error("ContextUncompressed::cache_buffer - no valid buffer?\n");
}
#endif //DEBUG
Expand All @@ -492,16 +471,15 @@ size_t ContextUncompressed::cache_buffer()
this->fh->clear();
this->fh->seekg(0, std::ios::end);
}

return s;
}



void ContextUncompressed::seek(off_t arg_offset)
{
if(!this->fh->is_open())
{
if(!this->fh->is_open()) {
throw std::runtime_error("[ContextUncompressed::seek] unexpected closed filehandle found.\n");
}

Expand All @@ -512,11 +490,9 @@ void ContextUncompressed::seek(off_t arg_offset)

ContextUncompressed::~ContextUncompressed()
{
if(this->fh != nullptr)
{
if(this->fh != nullptr) {
this->fh->close();
if(!this->fh)
{
if(!this->fh) {
std::cerr << "[ContextUncompressed::~ContextUncompressed] unexpected closed filehandle found.\n";
}

Expand All @@ -531,20 +507,20 @@ size_t ContextZstdSeekable::cache_buffer()
{
//size_t written = ZSTD_seekable_decompressFile_orDie(this->fh_zstd, this->file_i, this->buffer, this->file_i + READ_BUFFER_SIZE);
//this->fh->read(this->context->get_buffer(), READ_BUFFER_SIZE);

// figure out the location in the decompressed file

size_t written = ZSTD_seekable_decompressFile_orDie(
this->fh,
this->context->get_file_i(), //this->context->file_i,
this->context->get_buffer(),
this->context->tell() + READ_BUFFER_SIZE //this->context->file_i + READ_BUFFER_SIZE
);
this->fh,
this->context->get_file_i(), //this->context->file_i,
this->context->get_buffer(),
this->context->tell() + READ_BUFFER_SIZE //this->context->file_i + READ_BUFFER_SIZE
);


//printf("written = %i\n", written);
//printf("{{%s}}\n", this->context->get_buffer());

/*
{
#if DEBUG
Expand All @@ -568,32 +544,31 @@ size_t ContextZstdSeekable::cache_buffer()
*/

//throw std::runtime_error("[ContextZstdSeekable::cache_buffer] not implemented.\n");

return written;
}

void ContextZstdSeekable::fopen(off_t start_pos)
{
if(this->fh != nullptr)
{
if(this->fh != nullptr) {
throw std::runtime_error("[ContextZstdSeekable::fopen] opening a non closed reader.\n");
}


this->fh = ZSTD_seekable_decompressFile_init(this->context->get_filename().c_str());


if((this->fh->fin == NULL) | feof(this->fh->fin))
{
if((this->fh->fin == NULL) | feof(this->fh->fin)) {
throw std::runtime_error("[ContextZstdSeekable::fopen] not implemented.\n");
}
else
{
} else {
fseek_orDie(this->fh->fin, start_pos, SEEK_SET);// set initial file handle to 0?
// this->fh->seekg(start_pos, std::ios::beg);

size_t const initResult = ZSTD_seekable_initFile(this->seekable, fh->fin);
if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
if(ZSTD_isError(initResult)) {
fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult));
exit(11);
}

//@todo class member?
this->maxFileSize = ZSTD_seekable_getFileDecompressedSize(this->seekable);
Expand All @@ -616,6 +591,6 @@ ContextZstdSeekable::~ContextZstdSeekable()

delete this->fh;
}

//throw std::runtime_error("[ContextUncompressed::~ContextUncompressed] not implemented.\n");
}
2 changes: 1 addition & 1 deletion src/fastafs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,7 @@ void fastafs::load(std::string afilename)
{
std::streampos size;
unsigned char *memblock;

chunked_reader fh_in = chunked_reader(afilename.c_str());
{
fh_in.fopen(0);
Expand Down
2 changes: 1 addition & 1 deletion src/ucsc2bit_to_fastafs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ size_t ucsc2bit_to_fastafs(std::string ucsc2bit_file, std::string fastafs_file)
fh_fastafs << "\x00\x00\x00\x00"s;// position of metedata ~ unknown YET

// Read UCSC2bit header (n seq)
fh_ucsc2bit.read( (char*)( &buffer[0]) , 12);//conversion from unsigned char* to char* (https://stackoverflow.com/questions/604431/c-reading-unsigned-char-from-file-stream)
fh_ucsc2bit.read((char*)(&buffer[0]), 12); //conversion from unsigned char* to char* (https://stackoverflow.com/questions/604431/c-reading-unsigned-char-from-file-stream)
n = fourbytes_to_uint_ucsc2bit(buffer, 8);
uint_to_fourbytes(buffer, n);
std::vector<ucsc2bit_seq_header *> data(n);
Expand Down
2 changes: 1 addition & 1 deletion src/xbit_byte_encoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ void xbit_byte_encoder::next(chunked_reader &r)
unsigned char *buf = new unsigned char[2];
r.read(buf, 1);
this->data = buf[0];

delete[] buf;
}

Loading

0 comments on commit 15065e5

Please sign in to comment.