-
Notifications
You must be signed in to change notification settings - Fork 10
/
Kmer.hpp
101 lines (63 loc) · 1.66 KB
/
Kmer.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#ifndef BFG_KMER_HPP
#define BFG_KMER_HPP
#ifndef MAX_KMER_SIZE
#define MAX_KMER_SIZE 32
#endif
#include <stdio.h>
#include <stdint.h>
#include <cassert>
#include <cstring>
#include <string>
#include "hash.hpp"
/* Short description:
* - Store kmer strings by using 2 bits per base instead of 8
* - Easily return reverse complements of kmers, e.g. TTGG -> CCAA
* - Easily compare kmers
* - Provide hash of kmers
* - Get last and next kmer, e.g. ACGT -> CGTT or ACGT -> AACGT
* */
class Kmer {
public:
Kmer();
Kmer(const Kmer& o);
explicit Kmer(const char *s);
Kmer& operator=(const Kmer& o);
void set_deleted();
bool operator<(const Kmer& o) const;
bool operator==(const Kmer& o) const;
bool operator!=(const Kmer& o) const {
return !(*this == o);
}
void set_kmer(const char *s);
uint64_t hash() const;
Kmer twin() const;
Kmer rep() const;
Kmer getLink(const size_t index) const;
Kmer forwardBase(const char b) const;
Kmer backwardBase(const char b) const;
std::string getBinary() const;
void toString(char * s) const;
std::string toString() const;
// static functions
static void set_k(unsigned int _k);
static const unsigned int MAX_K = MAX_KMER_SIZE;
static unsigned int k;
private:
static unsigned int k_bytes;
static unsigned int k_longs;
static unsigned int k_modmask; // int?
// data fields
union {
uint8_t bytes[MAX_K/4];
uint64_t longs[MAX_K/32];
};
// private functions
//void shiftForward(int shift);
//void shiftBackward(int shift);
};
struct KmerHash {
size_t operator()(const Kmer &km) const {
return km.hash();
}
};
#endif // BFG_KMER_HPP