forked from maip/novo_muta
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcount_bin_trio.cc
95 lines (82 loc) · 2.66 KB
/
count_bin_trio.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
/**
* @file count_bin_trio.cc
* @author Melissa Ip
*
* This parses an input file, where each probability of mutation (as a float
* [0, 1]) is placed on a new line. The file can be generated by
* simulation_trio.cc.
*
* This creates 10 bins numbered 0-9 with probability cateogories at 10%
* intervals:
*
* BIN 0 1 2 ... 9
* % [0, 10), [10, 20), [20, 30), ..., [90, 100]
*
* This calculates the percentage of the sites in each bin out of the total
* number of sites. The digit in the tenths place of the probability represents
* the number of the bin it belongs to. A probability of 1.00 (100%) will go in
* the highest bin possible, bin 9. Negative probabilities are all grouped in a
* -1 bin.
*
* To compile on Herschel:
* c++ -std=c++11 -L/usr/local/lib -I/usr/local/include -o count_bin_trio utility.cc count_bin_trio.cc
*
* To run this file, provide the following command line inputs:
* ./count_bin_trio <input>.txt
*/
#include <fstream>
#include <sstream>
#include "utility.h"
const int kNumBins = 10; // 10 bins cover 0-100% with 10% intervals.
int main(int argc, const char *argv[]) {
if (argc < 2) {
Die("USAGE: count_bin_trio <input>.txt");
}
const string file_name = argv[1];
ifstream f(file_name);
if (!f.is_open() || 0 != f.fail()) {
Die("Input file cannot be read.");
}
string line;
int bin = 0;
int neg_bin = 0;
int total = 0;
int counts[kNumBins] = {0};
int probability_count; // Above the probability cut.
double probability_cut = 0.1;
double probability = 0.0;
while (getline(f, line)) {
line.erase(remove(line.begin(), line.end(), '\n'), line.end());
stringstream str(line);
str >> probability;
bin = (int) fmin(floor(probability * kNumBins), kNumBins - 1);
if (probability > probability_cut) {
probability_count++;
}
if (bin < 0) { // ERROR: Negative probability.
neg_bin++;
} else {
counts[bin]++;
}
total++;
}
f.close();
double percent = (double) probability_count / total * 100;
printf("%.2f%% or %d/%d sites have a probability greater than %.2f.\n",
percent, probability_count, total, probability_cut);
if (neg_bin > 0) {
percent = (double) neg_bin / total * 100;
printf("%.2f%% or %d/%d sites in bin %d.\n",
percent, neg_bin, total, -1); // All negative bins are grouped in -1.
}
for (int i = 0; i < kNumBins; i++) {
if (counts[i] > 0) {
percent = (double) counts[i] / total * 100;
printf("%.2f%% or %d/%d sites in bin %d.\n",
percent, counts[i], total, i);
} else {
printf("There are no sites in bin %d.\n", i);
}
}
return 0;
}