Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor low-level parsing and hashing #29

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions goetia/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,11 @@
from goetia import libgoetia
from goetia.utils import check_trait

typenames = [(t, t.__name__.replace(' ', '')) for t in [libgoetia.SparseppSetStorage,
libgoetia.PHMapStorage,
typenames = [(t, t.__name__.replace(' ', '')) for t in [libgoetia.PHMapStorage,
libgoetia.BitStorage,
libgoetia.ByteStorage,
libgoetia.NibbleStorage,
libgoetia.QFStorage,
libgoetia.BTreeStorage]]
libgoetia.QFStorage]]

types = [_type for _type, _name in typenames]

Expand All @@ -29,7 +27,7 @@
StorageTraits = libgoetia.StorageTraits


def get_storage_args(parser, default='SparseppSetStorage',
def get_storage_args(parser, default='PHMapStorage',
group_name='storage'):
if 'storage' in [g.title for g in parser._action_groups]:
return None
Expand Down
5 changes: 4 additions & 1 deletion include/goetia/cdbg/cdbg.hh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#pragma GCC diagnostic pop

#include "goetia/goetia.hh"
#include "goetia/errors.hh"
#include "goetia/metrics.hh"
#include "goetia/traversal/unitig_walker.hh"
#include "goetia/hashing/kmeriterator.hh"
Expand Down Expand Up @@ -625,7 +626,7 @@ public:
write_gfa1(out);
break;
default:
throw GoetiaException("Invalid cDBG format.");
throw std::invalid_argument("Invalid cDBG format.");
};
}

Expand Down Expand Up @@ -733,8 +734,10 @@ extern template class goetia::cDBG<goetia::dBG<goetia::BitStorage, goetia::CanLe
extern template class goetia::cDBG<goetia::dBG<goetia::PHMapStorage, goetia::FwdLemireShifter>>;
extern template class goetia::cDBG<goetia::dBG<goetia::PHMapStorage, goetia::CanLemireShifter>>;

/*
extern template class goetia::cDBG<goetia::dBG<goetia::SparseppSetStorage, goetia::FwdLemireShifter>>;
extern template class goetia::cDBG<goetia::dBG<goetia::SparseppSetStorage, goetia::CanLemireShifter>>;
*/

}

Expand Down
9 changes: 5 additions & 4 deletions include/goetia/cdbg/compactor.hh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <assert.h>
#include <cstdint>

#include "goetia/errors.hh"
#include "goetia/traversal/unitig_walker.hh"
#include "goetia/hashing/kmeriterator.hh"
#include "goetia/dbg.hh"
Expand Down Expand Up @@ -1037,13 +1038,13 @@ struct StreamingCompactor<GraphType<StorageType, ShifterType>> {

try {
compactor->insert_sequence(read.sequence);
} catch (InvalidCharacterException &e) {
} catch (InvalidSequence& e) {
std::cerr << "WARNING: Bad sequence encountered at "
<< this->_n_reads << ": "
<< read.sequence << ", exception was "
<< e.what() << std::endl;
return 0;
} catch (SequenceLengthException &e) {
} catch (SequenceTooShort& e) {
std::cerr << "NOTE: Skipped sequence that was too short: read "
<< this->_n_reads << " with sequence "
<< read.sequence
Expand Down Expand Up @@ -1159,14 +1160,14 @@ struct StreamingCompactor<GraphType<StorageType, ShifterType>> {

}

extern template class goetia::StreamingCompactor<goetia::dBG<goetia::SparseppSetStorage, goetia::FwdLemireShifter>>;
//extern template class goetia::StreamingCompactor<goetia::dBG<goetia::SparseppSetStorage, goetia::FwdLemireShifter>>;
extern template class goetia::StreamingCompactor<goetia::dBG<goetia::PHMapStorage, goetia::FwdLemireShifter>>;
// extern template class goetia::StreamingCompactor<goetia::dBG<goetia::BitStorage, goetia::FwdLemireShifter>>;
// extern template class goetia::StreamingCompactor<goetia::dBG<goetia::ByteStorage, goetia::FwdLemireShifter>>;
// extern template class goetia::StreamingCompactor<goetia::dBG<goetia::NibbleStorage, goetia::FwdLemireShifter>>;
// extern template class goetia::StreamingCompactor<goetia::dBG<goetia::QFStorage, goetia::FwdLemireShifter>>;

extern template class std::deque<goetia::StreamingCompactor<goetia::dBG<goetia::SparseppSetStorage, goetia::FwdLemireShifter>>>;
//extern template class std::deque<goetia::StreamingCompactor<goetia::dBG<goetia::SparseppSetStorage, goetia::FwdLemireShifter>>>;
extern template class std::deque<goetia::StreamingCompactor<goetia::dBG<goetia::PHMapStorage, goetia::FwdLemireShifter>>>;


Expand Down
6 changes: 3 additions & 3 deletions include/goetia/cdbg/saturating_compactor.hh
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
#ifndef GOETIA_SATURATING_CPTOR
#define GOETIA_SATURATING_CPTOR

#include "goetia/errors.hh"
#include "goetia/processors.hh"
#include "goetia/parsing/readers.hh"
#include "goetia/sequences/exceptions.hh"

namespace goetia {

Expand Down Expand Up @@ -54,13 +54,13 @@ struct SaturatingCompactor {
try {
compactor->insert_sequence(read.sequence);
signature->insert_sequence(read.sequence);
} catch (InvalidCharacterException &e) {
} catch (InvalidSequence& e) {
std::cerr << "WARNING: Bad sequence encountered at "
<< this->_n_reads << ": "
<< read.sequence << ", exception was "
<< e.what() << std::endl;
return 0;
} catch (SequenceLengthException &e) {
} catch (SequenceTooShort& e) {
std::cerr << "NOTE: Skipped sequence that was too short: read "
<< this->_n_reads << " with sequence "
<< read.sequence
Expand Down
17 changes: 13 additions & 4 deletions include/goetia/dbg.hh
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
#define GOETIA_DBG_HH

#include "goetia/meta.hh"
#include "goetia/errors.hh"
#include "goetia/hashing/kmeriterator.hh"
#include "goetia/processors.hh"
#include "goetia/storage/storage.hh"
#include "goetia/storage/storage_types.hh"
#include "goetia/hashing/rollinghashshifter.hh"
#include "goetia/hashing/ukhs.hh"
#include "goetia/sequences/exceptions.hh"
#include "goetia/traversal/unitig_walker.hh"

#include <algorithm>
Expand Down Expand Up @@ -513,10 +513,12 @@ extern template class goetia::dBG<goetia::BitStorage, goetia::CanLemireShifter>;
extern template class goetia::dBG<goetia::BitStorage, goetia::FwdUnikmerShifter>;
extern template class goetia::dBG<goetia::BitStorage, goetia::CanUnikmerShifter>;

/*
extern template class goetia::dBG<goetia::SparseppSetStorage, goetia::FwdLemireShifter>;
extern template class goetia::dBG<goetia::SparseppSetStorage, goetia::CanLemireShifter>;
extern template class goetia::dBG<goetia::SparseppSetStorage, goetia::FwdUnikmerShifter>;
extern template class goetia::dBG<goetia::SparseppSetStorage, goetia::CanUnikmerShifter>;
*/

extern template class goetia::dBG<goetia::ByteStorage, goetia::FwdLemireShifter>;
extern template class goetia::dBG<goetia::ByteStorage, goetia::CanLemireShifter>;
Expand All @@ -538,20 +540,24 @@ extern template class goetia::dBG<goetia::PHMapStorage, goetia::CanLemireShifter
extern template class goetia::dBG<goetia::PHMapStorage, goetia::FwdUnikmerShifter>;
extern template class goetia::dBG<goetia::PHMapStorage, goetia::CanUnikmerShifter>;

/*
extern template class goetia::dBG<goetia::BTreeStorage, goetia::FwdLemireShifter>;
extern template class goetia::dBG<goetia::BTreeStorage, goetia::CanLemireShifter>;
extern template class goetia::dBG<goetia::BTreeStorage, goetia::FwdUnikmerShifter>;
extern template class goetia::dBG<goetia::BTreeStorage, goetia::CanUnikmerShifter>;
*/

extern template class goetia::UnitigWalker<goetia::dBG<goetia::BitStorage, goetia::FwdLemireShifter>>;
extern template class goetia::UnitigWalker<goetia::dBG<goetia::BitStorage, goetia::CanLemireShifter>>;
extern template class goetia::UnitigWalker<goetia::dBG<goetia::BitStorage, goetia::FwdUnikmerShifter>>;
extern template class goetia::UnitigWalker<goetia::dBG<goetia::BitStorage, goetia::CanUnikmerShifter>>;

/*
extern template class goetia::UnitigWalker<goetia::dBG<goetia::SparseppSetStorage, goetia::FwdLemireShifter>>;
extern template class goetia::UnitigWalker<goetia::dBG<goetia::SparseppSetStorage, goetia::CanLemireShifter>>;
extern template class goetia::UnitigWalker<goetia::dBG<goetia::SparseppSetStorage, goetia::FwdUnikmerShifter>>;
extern template class goetia::UnitigWalker<goetia::dBG<goetia::SparseppSetStorage, goetia::CanUnikmerShifter>>;
*/

extern template class goetia::UnitigWalker<goetia::dBG<goetia::ByteStorage, goetia::FwdLemireShifter>>;
extern template class goetia::UnitigWalker<goetia::dBG<goetia::ByteStorage, goetia::CanLemireShifter>>;
Expand All @@ -573,21 +579,24 @@ extern template class goetia::UnitigWalker<goetia::dBG<goetia::PHMapStorage, goe
extern template class goetia::UnitigWalker<goetia::dBG<goetia::PHMapStorage, goetia::FwdUnikmerShifter>>;
extern template class goetia::UnitigWalker<goetia::dBG<goetia::PHMapStorage, goetia::CanUnikmerShifter>>;

/*
extern template class goetia::UnitigWalker<goetia::dBG<goetia::BTreeStorage, goetia::FwdLemireShifter>>;
extern template class goetia::UnitigWalker<goetia::dBG<goetia::BTreeStorage, goetia::CanLemireShifter>>;
extern template class goetia::UnitigWalker<goetia::dBG<goetia::BTreeStorage, goetia::FwdUnikmerShifter>>;
extern template class goetia::UnitigWalker<goetia::dBG<goetia::BTreeStorage, goetia::CanUnikmerShifter>>;
*/

extern template class goetia::KmerIterator<goetia::dBG<goetia::BitStorage, goetia::FwdLemireShifter>>;
extern template class goetia::KmerIterator<goetia::dBG<goetia::BitStorage, goetia::CanLemireShifter>>;
extern template class goetia::KmerIterator<goetia::dBG<goetia::BitStorage, goetia::FwdUnikmerShifter>>;
extern template class goetia::KmerIterator<goetia::dBG<goetia::BitStorage, goetia::CanUnikmerShifter>>;

/*
extern template class goetia::KmerIterator<goetia::dBG<goetia::SparseppSetStorage, goetia::FwdLemireShifter>>;
extern template class goetia::KmerIterator<goetia::dBG<goetia::SparseppSetStorage, goetia::CanLemireShifter>>;
extern template class goetia::KmerIterator<goetia::dBG<goetia::SparseppSetStorage, goetia::FwdUnikmerShifter>>;
extern template class goetia::KmerIterator<goetia::dBG<goetia::SparseppSetStorage, goetia::CanUnikmerShifter>>;

*/
extern template class goetia::KmerIterator<goetia::dBG<goetia::ByteStorage, goetia::FwdLemireShifter>>;
extern template class goetia::KmerIterator<goetia::dBG<goetia::ByteStorage, goetia::CanLemireShifter>>;
extern template class goetia::KmerIterator<goetia::dBG<goetia::ByteStorage, goetia::FwdUnikmerShifter>>;
Expand All @@ -608,11 +617,11 @@ extern template class goetia::KmerIterator<goetia::dBG<goetia::PHMapStorage, goe
extern template class goetia::KmerIterator<goetia::dBG<goetia::PHMapStorage, goetia::FwdUnikmerShifter>>;
extern template class goetia::KmerIterator<goetia::dBG<goetia::PHMapStorage, goetia::CanUnikmerShifter>>;

/*
extern template class goetia::KmerIterator<goetia::dBG<goetia::BTreeStorage, goetia::FwdLemireShifter>>;
extern template class goetia::KmerIterator<goetia::dBG<goetia::BTreeStorage, goetia::CanLemireShifter>>;
extern template class goetia::KmerIterator<goetia::dBG<goetia::BTreeStorage, goetia::FwdUnikmerShifter>>;
extern template class goetia::KmerIterator<goetia::dBG<goetia::BTreeStorage, goetia::CanUnikmerShifter>>;


*/

#endif
103 changes: 103 additions & 0 deletions include/goetia/errors.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/**
* (c) Camille Scott, 2021
* File : errors.hh
* License: MIT
* Author : Camille Scott <[email protected]>
* Date : 04.04.2022
*/

#ifndef GOETIA_ERRORS_HH
#define GOETIA_ERRORS_HH

#include <exception>
#include <stdexcept>
#include <string>

struct GoetiaBaseException : public std::exception {};

struct InvalidSequence : public GoetiaBaseException {
size_t read_number;
const std::string sequence;

InvalidSequence(size_t read_number, const std::string& sequence)
: read_number(read_number), sequence(sequence)
{}
};

struct InvalidRecord : public GoetiaBaseException {
size_t read_number;
const std::string sequence;

InvalidRecord(size_t read_number, const std::string& sequence)
: read_number(read_number), sequence(sequence)
{}
};


struct InvalidRecordPair : public GoetiaBaseException {
size_t pair_number;
const std::string name_left, name_right;

InvalidRecordPair(size_t number, const std::string& name_left, const std::string& name_right)
: pair_number(number), name_left(name_left), name_right(name_right)
{}
};


struct EndOfStream : public GoetiaBaseException {};

struct InvalidStream : public GoetiaBaseException {
const std::string message;

InvalidStream(const std::string& message)
: message(message)
{}
};

struct InvalidPairedStream : public GoetiaBaseException {};

struct StreamReadError : public GoetiaBaseException {
size_t read_number;
const std::string filename;

StreamReadError(size_t read_number, const std::string& filename)
: read_number(read_number), filename(filename)
{}
};


struct SequenceTooShort : public GoetiaBaseException {
const std::string sequence;
SequenceTooShort(const std::string& sequence)
: sequence(sequence)
{}
};


struct UninitializedShifter : public GoetiaBaseException {};

struct NotImplemented : public GoetiaBaseException {};

struct InvalidPartition : public GoetiaBaseException {
uint64_t partition;
InvalidPartition(uint64_t partition)
: partition(partition)
{}
};

struct DeserializationError : public GoetiaBaseException {
const std::string message;
DeserializationError(const std::string& message)
: message(message)
{}
};

struct SerializationError : public GoetiaBaseException {
const std::string message;
SerializationError(const std::string& message)
: message(message)
{}
};


#endif
Loading