-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
172 additions
and
50 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <string.h> | ||
#include <zlib.h> | ||
|
||
#define MAX_LINE 1000000 | ||
#define NUM_BINS 16 | ||
|
||
// Function to determine the bin for a given length | ||
int get_bin(int length) { | ||
int bins[] = {10, 100, 1000, 2500, 5000, 10000, 20000, 35000, 50000, 75000, 100000, 200000, 300000, 500000, 750000, 1000000}; | ||
for (int i = 0; i < NUM_BINS; i++) { | ||
if (length <= bins[i]) { | ||
return i; | ||
} | ||
} | ||
return NUM_BINS - 1; // For reads longer than the last bin | ||
} | ||
|
||
int main(int argc, char *argv[]) { | ||
if (argc != 2) { | ||
fprintf(stderr, "Usage: %s <fastq_file>\n", argv[0]); | ||
return 1; | ||
} | ||
|
||
gzFile fp = gzopen(argv[1], "r"); | ||
if (!fp) { | ||
fprintf(stderr, "Error: Could not open file %s\n", argv[1]); | ||
return 1; | ||
} | ||
|
||
char line[MAX_LINE]; | ||
int counters[NUM_BINS] = {0}; | ||
int line_count = 0; | ||
|
||
while (gzgets(fp, line, sizeof(line))) { | ||
line_count++; | ||
if (line_count % 4 == 2) { // This is the sequence line | ||
int length = strlen(line) - 1; // Subtract 1 to remove newline | ||
int bin = get_bin(length); | ||
counters[bin]++; | ||
} | ||
} | ||
|
||
gzclose(fp); | ||
|
||
// Print results | ||
printf("Bin,Number of Reads\n"); | ||
int bins[] = {10, 100, 1000, 2500, 5000, 10000, 20000, 35000, 50000, 75000, 100000, 200000, 300000, 500000, 750000, 1000000}; | ||
for (int i = 0; i < NUM_BINS; i++) { | ||
printf("%d,%d\n", bins[i], counters[i]); | ||
} | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
#!/bin/bash | ||
|
||
# Check if required arguments are provided | ||
if [ "$#" -lt 3 ]; then | ||
echo "Usage: $0 <input_file> <FORMAT> <OUTDIR>" | ||
exit 1 | ||
fi | ||
|
||
INPUT_FILE=$1 | ||
FORMAT=$2 | ||
OUTDIR=$3 | ||
|
||
# Process the input file and generate the string | ||
GENERATED_STRING=$(awk -F',' ' | ||
NR>1 { | ||
if ($1 ~ /^[0-9]+$/ && $2 ~ /^[0-9]+$/ && $2 > 0) { | ||
printf "%d*%d ", $2, $1 | ||
} | ||
} | ||
' "$INPUT_FILE" | sed 's/ $//') | ||
|
||
# Run the n50_simreads command with the generated string | ||
n50_simreads --${FORMAT} -o ${OUTDIR} $GENERATED_STRING | ||
|
||
# Print the command that was executed (for verification) | ||
echo "Executed command: n50_simreads --${FORMAT} -o ${OUTDIR} $GENERATED_STRING" | ||
|
||
# Calculate and print total number of reads | ||
TOTAL_READS=$(awk -F',' 'NR>1 && $2 ~ /^[0-9]+$/ {sum += $2} END {print sum}' "$INPUT_FILE") | ||
echo "Total number of reads: $TOTAL_READS" | ||
|
||
# Find the maximum read length | ||
MAX_LENGTH=$(awk -F',' 'NR>1 && $1 ~ /^[0-9]+$/ && $2 > 0 {max=$1} END {print max}' "$INPUT_FILE") | ||
echo "Maximum read length: $MAX_LENGTH" |