forked from samuell/gccontent-benchmark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.rs
147 lines (126 loc) · 5.09 KB
/
main.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#![feature(core_intrinsics)]
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::intrinsics::unlikely;
const CHUNKSIZE: usize = 32;
const BUFSIZE: usize = 4096;
/// Sum each adjacent 8bit lane into u64 x 4
#[inline]
unsafe fn hsum_epu8_epu64(v: __m256i) -> __m256i {
_mm256_sad_epu8(v, _mm256_setzero_si256())
}
/// Sum u64 x 4 -> i64
#[inline]
unsafe fn hsum_epu64_scalar(v: __m256i) -> i64 {
let lo = _mm256_castsi256_si128(v); // get the lower 8 u16s
let hi = _mm256_extracti128_si256(v, 1); // get the upper 8 u16
let sum2x64 = _mm_add_epi64(lo, hi); // narrow to 128
let hi = _mm_unpackhi_epi64(sum2x64, sum2x64);
let sum = _mm_add_epi64(hi, sum2x64); // narrow to 64
_mm_cvtsi128_si64(sum)
}
/// Count AT / GT occurances in buffer.
/// Relies on the fact that `G|C` & 10 == 2, `A|T` & 10 == 0, and `N` & 10 == 10.
/// Ideally the buffershould be a multiple of CHUNKSIZE
#[inline]
fn count_gc_at(buffer: &[u8]) -> (i64, i64) {
let mut gc = 0;
let mut at = 0;
unsafe {
// Initialize counters
let mut gc_sums_64 = _mm256_setzero_si256();
let mut at_sums_64 = _mm256_setzero_si256();
let mut gc_sums_8 = _mm256_setzero_si256();
let mut at_sums_8 = _mm256_setzero_si256();
let zeros = _mm256_setzero_si256();
let twos = _mm256_set1_epi8(2);
let tens = _mm256_set1_epi8(10);
// Create a super chunker of known size so that inner loop can unroll
let mut superchunker = buffer.chunks_exact(CHUNKSIZE * 16);
let mut counter = 0;
while let Some(superchunk) = &superchunker.next() {
let chunker = superchunk.chunks_exact(CHUNKSIZE);
for chunk in chunker {
let v = _mm256_loadu_si256(chunk as *const _ as *const __m256i);
let bit_and = _mm256_and_si256(v, tens);
gc_sums_8 = _mm256_sub_epi8(gc_sums_8, _mm256_cmpeq_epi8(bit_and, twos));
at_sums_8 = _mm256_sub_epi8(at_sums_8, _mm256_cmpeq_epi8(bit_and, zeros));
// Only sum when we get close to overflowing
counter += 1;
if counter == 255 {
gc_sums_64 = _mm256_add_epi64(gc_sums_64, hsum_epu8_epu64(gc_sums_8));
at_sums_64 = _mm256_add_epi64(at_sums_64, hsum_epu8_epu64(at_sums_8));
gc_sums_8 = _mm256_setzero_si256();
at_sums_8 = _mm256_setzero_si256();
counter = 0;
}
}
}
// Work out the reminder post super chunking
let mut chunker = superchunker.remainder().chunks_exact(CHUNKSIZE);
let mut counter = 0;
while let Some(chunk) = chunker.next() {
let v = _mm256_loadu_si256(chunk as *const _ as *const __m256i);
let bit_and = _mm256_and_si256(v, tens);
gc_sums_8 = _mm256_sub_epi8(gc_sums_8, _mm256_cmpeq_epi8(bit_and, twos));
at_sums_8 = _mm256_sub_epi8(at_sums_8, _mm256_cmpeq_epi8(bit_and, zeros));
// Only sum when we get close to overflowing
counter += 1;
if counter == 255 {
gc_sums_64 = _mm256_add_epi64(gc_sums_64, hsum_epu8_epu64(gc_sums_8));
at_sums_64 = _mm256_add_epi64(at_sums_64, hsum_epu8_epu64(at_sums_8));
gc_sums_8 = _mm256_setzero_si256();
at_sums_8 = _mm256_setzero_si256();
counter = 0;
}
}
// horizontal sum the counts
gc_sums_64 = _mm256_add_epi64(gc_sums_64, hsum_epu8_epu64(gc_sums_8));
at_sums_64 = _mm256_add_epi64(at_sums_64, hsum_epu8_epu64(at_sums_8));
gc += hsum_epu64_scalar(gc_sums_64);
at += hsum_epu64_scalar(at_sums_64);
// Finally sum up an remaining bytes
for c in chunker.remainder() {
gc += (*c & 10 == 2) as i64;
at += (*c & 10 == 0) as i64;
}
}
(gc, at)
}
fn main() {
let filename = "chry_multiplied.fa";
let file = File::open(filename).unwrap();
let mut reader = BufReader::new(file);
let mut line = Vec::with_capacity(BUFSIZE);
let mut at = 0;
let mut gc = 0;
loop {
if unlikely(reader.read_until(b'\n', &mut line).expect("error reading") == 0) {
break;
}
if unlikely(line[0] == b'>') {
line.clear();
continue;
}
// Try to read more lines
let buffer = loop {
let line_len = line.len();
let bytes_read = reader.read_until(b'\n', &mut line).expect("error reading");
if bytes_read > 0 && line[line_len] == b'>' {
break &line[0..line_len];
} else if bytes_read == 0 || line.len() >= BUFSIZE {
break &line;
}
};
let (gc_buf, at_buf) = count_gc_at(&buffer);
gc += gc_buf;
at += at_buf;
line.clear();
}
let gc_ratio: f64 = gc as f64 / (gc as f64 + at as f64);
println!("GC ratio (gc/(gc+at)): {}", gc_ratio);
}