Skip to content

Commit

Permalink
Included option in order to set the number of chomosomes of the genome
Browse files Browse the repository at this point in the history
  • Loading branch information
rioro2000 committed Oct 5, 2018
1 parent 62bc110 commit 462d7e1
Show file tree
Hide file tree
Showing 11 changed files with 69 additions and 14 deletions.
3 changes: 2 additions & 1 deletion lib/c/src/aligners/bwt/bwt.c
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ void bwt_anchor_free(bwt_anchor_t *bwt_anchor) {

bwt_optarg_t *bwt_optarg_new(const size_t num_errors, const size_t num_threads, const int filter_read_mappings,
const int filter_seed_mappings, const size_t min_cal_size, const double umbral_cal_length_factor,
const int min_read_discard, const int max_inner_gap) {
const int min_read_discard, const int max_inner_gap, const int max_num_chromosomes) {
bwt_optarg_t *bwt_optarg = (bwt_optarg_t *) calloc(1, sizeof(bwt_optarg_t));

bwt_optarg->num_errors = num_errors;
Expand All @@ -214,6 +214,7 @@ bwt_optarg_t *bwt_optarg_new(const size_t num_errors, const size_t num_threads,
bwt_optarg->umbral_cal_length_factor = umbral_cal_length_factor; //Ricardo
bwt_optarg->min_read_discard = min_read_discard; //Ricardo
bwt_optarg->max_inner_gap = max_inner_gap; //Ricardo
bwt_optarg->max_num_chromosomes = max_num_chromosomes;

return bwt_optarg;
}
Expand Down
4 changes: 3 additions & 1 deletion lib/c/src/aligners/bwt/bwt.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ typedef struct bwt_optarg {
int filter_seed_mappings;
size_t min_cal_size;
double umbral_cal_length_factor;
int max_num_chromosomes;
int min_read_discard;
int max_inner_gap;
} bwt_optarg_t;
Expand All @@ -211,7 +212,8 @@ bwt_optarg_t *bwt_optarg_new(const size_t num_errors,
const size_t min_cal_size,
const double umbral_cal_length_factor,
const int min_read_discard,
const int max_inner_gap);
const int max_inner_gap,
const int max_num_chromosomes);

void bwt_optarg_free(bwt_optarg_t *optarg);

Expand Down
10 changes: 10 additions & 0 deletions man/Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ The mandatory command line options for the mode are:
all the processing cores available in your computer platform, improving the
performance.

HPG-Methyl is preconfigured to work with human genome. In order to work with other
types of genome, it can be necessary to configure de number of chromosomes of the genome:
* `--max-num-chromosomes`: Fixes the number of chromosomes of the genome. By default, 24,
human genome number of chromosomes (22+X+Y). The maximum value allowed is 1019.

## Burrows-Wheeler Index generation

To be able to run HPG-Methyl, the index used to access the reference genome must
Expand All @@ -42,6 +47,11 @@ be stored.
* `--bs-index`: Enable this option to create a BWT index compatible with the
methylation status extraction process.

HPG-Methyl is preconfigured to work with human genome. In order to work with other
types of genome, it can be necessary to configure de number of chromosomes of the genome:
* `--max-num-chromosomes`: Fixes the number of chromosomes of the genome. By default, 24,
human genome number of chromosomes (22+X+Y). The maximum value allowed is 1019.



## Methylation status
Expand Down
Binary file modified man/manual.pdf
Binary file not shown.
4 changes: 2 additions & 2 deletions src/bs/bs_aligner.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ void run_bs_aligner(genome_t *genome2, genome_t *genome1, genome_t *genome,
sw_input.genome1_p = genome1;
sw_input.genome2_p = genome2;

sw_input.valuesCT = (unsigned long long **)malloc(50 * sizeof(unsigned long long *));
sw_input.valuesGA = (unsigned long long **)malloc(50 * sizeof(unsigned long long *));
sw_input.valuesCT = (unsigned long long **)malloc(options->max_num_chromosomes * sizeof(unsigned long long *)); //ponia 50
sw_input.valuesGA = (unsigned long long **)malloc(options->max_num_chromosomes * sizeof(unsigned long long *)); //ponia 50

int gen_loads = load_encode_context(options->bwt_dirname, sw_input.valuesCT, sw_input.valuesGA);

Expand Down
11 changes: 9 additions & 2 deletions src/bs/methylation.c
Original file line number Diff line number Diff line change
Expand Up @@ -898,12 +898,17 @@ void postprocess_bs(char *query_name, char status, size_t chromosome, size_t sta

//------------------------------------------------------------------------------------

int encode_context(char* filename, char* directory) {
int encode_context(char* filename, char* directory, int max_num_chromosomes) {
printf("Init Genome Compresion\n");

FILE *f1, *f2, *f3, *f4;
unsigned long long value, value2;
size_t size[50] = {0};
size_t *size = (size_t*)malloc(max_num_chromosomes*sizeof(size_t)); //Tamaño máximo de cromosoma 24 para humanos
for (int i=0;i<max_num_chromosomes;i++)
size[i] = 0;



size_t size2;
int chromosome, i, cont, cont2;
char *line1, *line2;
Expand Down Expand Up @@ -1117,13 +1122,15 @@ int encode_context(char* filename, char* directory) {
free(line1);
line1 = strdup(line2);
}
printf("chromosomes = %2i, size = %10lu\n", chromosome, size[chromosome]);

fprintf(f4, "%i\n", chromosome);

for (i = 0; i < chromosome; i++) {
fprintf(f4, "%lu\n", size[i]);
}

free(size);
free(tmp);
free(line1);
free(line2);
Expand Down
2 changes: 1 addition & 1 deletion src/bs/methylation.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ void write_bs_context(metil_file_t *metil_file, bs_context_t *bs_context, size_t

//====================================================================================

int encode_context(char* filename, char* directory);
int encode_context(char* filename, char* directory, int max_num_chromosomes);

//====================================================================================

Expand Down
10 changes: 6 additions & 4 deletions src/bwt_server_cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ cal_t *convert_bwt_anchor_to_CAL(bwt_anchor_t *bwt_anchor, size_t read_start, si

//------------------------------------------------------------------------------------

size_t bwt_search_pair_anchors(array_list_t *list, unsigned int read_length, size_t min_cal_size, double umbral_cal_length_factor, int min_read_discard) {
size_t bwt_search_pair_anchors(array_list_t *list, unsigned int read_length, size_t min_cal_size, double umbral_cal_length_factor, int min_read_discard, int num_max_chromosomes) {
bwt_anchor_t *bwt_anchor;

int anchor_length_tmp;
Expand Down Expand Up @@ -160,7 +160,7 @@ size_t bwt_search_pair_anchors(array_list_t *list, unsigned int read_length, siz

if (!found_double_anchor && found_anchor) {
const int nstrands = 2;
const int nchromosomes = 30; //TODO: Parameter
const int nchromosomes = num_max_chromosomes; //24 for human genome

linked_list_t ***new_cals_list = (linked_list_t ***)malloc(sizeof(linked_list_t **)*nstrands);

Expand Down Expand Up @@ -545,7 +545,8 @@ int apply_bwt_bs(bwt_server_input_t* input, batch_t *batch, bwt_stage_bs_workspa
if (mapping_batch->mapping_lists[i]->size) {
bwt_search_pair_anchors(mapping_batch->mapping_lists[i], fq_read->length,
MIN_CAL_SIZE, input->bwt_optarg_p->umbral_cal_length_factor,
input->bwt_optarg_p->min_read_discard);
input->bwt_optarg_p->min_read_discard,
input->bwt_optarg_p->max_num_chromosomes);
}

if (!mapping_batch->mapping_lists[i]->size) {
Expand All @@ -556,7 +557,8 @@ int apply_bwt_bs(bwt_server_input_t* input, batch_t *batch, bwt_stage_bs_workspa
if (mapping_batch->mapping_lists2[i]->size) {
bwt_search_pair_anchors(mapping_batch->mapping_lists2[i], fq_read->length, MIN_CAL_SIZE,
input->bwt_optarg_p->umbral_cal_length_factor,
input->bwt_optarg_p->min_read_discard);
input->bwt_optarg_p->min_read_discard,
input->bwt_optarg_p->max_num_chromosomes);
}

if (!mapping_batch->mapping_lists2[i]->size) {
Expand Down
26 changes: 24 additions & 2 deletions src/hpg-aligner.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,27 @@ int main(int argc, char* argv[]) {
// Bisulphite index generation
printf("\nBisulphite index generation\n");

//check if reference genome file has a number of chromosomes supported
FILE *fd;
fd = fopen(options->genome_filename, "r");
if (fd == NULL) { LOG_FATAL_F("Error opening file %s", options->genome_filename); }


char ch;
int count=0;
while (((ch = fgetc(fd)) != EOF))
{
if (ch == '>')
count++;
}
fclose(fd);

if (count > options->max_num_chromosomes) {
printf("\nGenome reference File has %d chromosomes but the maximum supported is %d\n",count, options->max_num_chromosomes);
printf("Please, fix the genome reference file: %s, or increase the maximum number of chromosomes allowed\n", options->genome_filename);
exit(0);
}

/******************************************************************************
* *
* Generates the genome transform from the input and builds the index *
Expand Down Expand Up @@ -167,7 +188,7 @@ int main(int argc, char* argv[]) {
LOG_DEBUG("ACT index Done !!\n");

LOG_DEBUG("Generation of CT & GA context\n");
encode_context(options->genome_filename, options->bwt_dirname);
encode_context(options->genome_filename, options->bwt_dirname, options->max_num_chromosomes);
LOG_DEBUG("CT & GA context Done !!\n");

exit(0);
Expand Down Expand Up @@ -238,7 +259,8 @@ int main(int argc, char* argv[]) {
options->min_cal_size,
options->umbral_cal_length_factor,
options->min_read_discard,
options->max_inner_gap);
options->max_inner_gap,
options->max_num_chromosomes);

// CAL parameters
cal_optarg_t *cal_optarg = cal_optarg_new(options->min_cal_size, options->seeds_max_distance,
Expand Down
8 changes: 8 additions & 0 deletions src/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ options_t *options_new(void) {
options->umbral_cal_length_factor = DEFAULT_UMBRAL_CAL_LENGTH_FACTOR;
options->min_read_discard = DEFAULT_MIN_READ_DISCARD;
options->max_inner_gap = -1;
options->max_num_chromosomes = DEFAULT_MAX_CHROMOSOMES;

//new variables for bisulphite case in index generation
options->bs_index = 0;
Expand Down Expand Up @@ -391,6 +392,7 @@ void** argtable_options_new(void) {
argtable[47] = arg_dbl0(NULL, "umbral-cal-length-factor", NULL, "Cal is accepted like candidate if has been mapped more than cal_size / this factor");
argtable[48] = arg_int0(NULL, "min-read-discard", NULL, "Discard small anchors for reads larger or equal than this value");
argtable[49] = arg_int0(NULL, "max-inner-gap", NULL, "Recursive Burrows-Wheeler stops when inner gap is lower than this value ");
argtable[50] = arg_int0(NULL, "max-num-chromosomes", NULL, "Maximum number of chromosomes (default: 24, human genome chromosomes (22+X+Y), maximum accepted value 1019)");

argtable[NUM_OPTIONS] = arg_end(20);

Expand Down Expand Up @@ -483,6 +485,12 @@ options_t *read_CLI_options(void **argtable, options_t *options) {
//new value
if (((struct arg_int*)argtable[49])->count) { options->max_inner_gap = *(((struct arg_int*)argtable[49])->ival); }

if (((struct arg_int*)argtable[50])->count) {
options->max_num_chromosomes = *(((struct arg_int*)argtable[50])->ival);
if (options->max_num_chromosomes > DEFAULT_MAX_CHROMOSOMES_ALLOWED)
options->max_num_chromosomes = DEFAULT_MAX_CHROMOSOMES_ALLOWED;
}


return options;
}
Expand Down
5 changes: 4 additions & 1 deletion src/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
// BEGIN: Ricardo
#define DEFAULT_UMBRAL_CAL_LENGTH_FACTOR 4
#define DEFAULT_MIN_READ_DISCARD 100
#define DEFAULT_MAX_CHROMOSOMES 24
#define DEFAULT_MAX_CHROMOSOMES_ALLOWED 1019
// END: Ricardo

#define DEFAULT_MIN_NUM_SEEDS_IN_CAL -1
Expand All @@ -57,7 +59,7 @@
#define DEFAULT_FILTER_READ_MAPPINGS_BS 100
#define DEFAULT_FILTER_SEED_MAPPINGS_BS 500
//=====================================================================
#define NUM_OPTIONS 50
#define NUM_OPTIONS 51

typedef struct options {
char mode[64];
Expand Down Expand Up @@ -119,6 +121,7 @@ typedef struct options {
double umbral_cal_length_factor;
int min_read_discard;
int max_inner_gap;
int max_num_chromosomes;
//RICARDO
// new variables for bisulphite case
} options_t;
Expand Down

0 comments on commit 462d7e1

Please sign in to comment.