Skip to content

Commit

Permalink
Notifying all kind of errors in FILTER metadata entries and all meta …
Browse files Browse the repository at this point in the history
…ID subfields. #18
  • Loading branch information
Cristina Yenyxe Gonzalez Garcia committed Dec 5, 2014
1 parent d5f62e8 commit 825c16b
Show file tree
Hide file tree
Showing 3 changed files with 4,141 additions and 3,610 deletions.
46 changes: 30 additions & 16 deletions cpp/src/bioformats/vcf/vcf_v41.ragel
Original file line number Diff line number Diff line change
Expand Up @@ -207,12 +207,14 @@

########## Incorrect fields actions ##########

# Fileformat line
action fileformat_error {
ErrorPolicy::handle_fileformat_section_error(*this,
"Fileformat is not a sequence of alphanumeric and/or punctuation characters");
fhold; fgoto meta_section_skip;
}

# ALT metadata
action meta_alt_err {
ErrorPolicy::handle_fileformat_section_error(*this, "Error in ALT metadata");
fhold; fgoto meta_section_skip;
Expand All @@ -228,31 +230,42 @@
fhold; fgoto meta_section_skip;
}

action meta_alt_desc_err {
ErrorPolicy::handle_fileformat_section_error(*this, "ALT metadata description format is not correct");
fhold; fgoto meta_section_skip;
}

# FILTER metadata
action meta_filter_err {
ErrorPolicy::handle_fileformat_section_error(*this, "Error in FILTER metadata");
fhold; fgoto meta_section_skip;
}

# FORMAT metadata
action meta_format_err {
ErrorPolicy::handle_fileformat_section_error(*this, "Error in FORMAT metadata");
fhold; fgoto meta_section_skip;
}

# INFO metadata
action meta_info_err {
ErrorPolicy::handle_fileformat_section_error(*this, "Error in INFO metadata");
fhold; fgoto meta_section_skip;
}

# Metadata generic errors (do not apply to a specific type)
action meta_generic_err {
ErrorPolicy::handle_fileformat_section_error(*this, "Error in metadata");
fhold; fgoto meta_section_skip;
}

action meta_id_err {
ErrorPolicy::handle_fileformat_section_error(*this, "Metadata ID contains a character different from alphanumeric, dot, underscore and dash");
fhold; fgoto meta_section_skip;
}

action meta_desc_err {
ErrorPolicy::handle_fileformat_section_error(*this, "Metadata description string is not valid");
fhold; fgoto meta_section_skip;
}

# Records

action chrom_error {
printf("Line %zu: Error in 'chrom' field\n", n_lines);
fhold; fgoto body_section_skip;
Expand Down Expand Up @@ -316,8 +329,10 @@
int_number = (digit)+ ;
# Bases are case-insensitive
bases = ("A" | "C" | "G" | "T" | "N" | "a" | "c" | "t" | "g" | "n" )+ ;
# Identifiers may contain some symbols, but not only those
# Identifiers may contain some symbols, but not only those
# TODO Could it accept more symbols? Comma won't be, for sure
identifier = (alnum | "." | "_" | "-" )+ - ((".")+ | ("_")+ | ("-")+) ;


# File format must be a sequence of alphanumeric and/or punctuation characters like "VCFv4.1"
fileformat_name = (alnum | punct)+ $err(fileformat_error);
Expand All @@ -326,39 +341,38 @@
# Meta-data
meta_key = (alnum | "_" )+ - ("ALT" | "FILTER" | "FORMAT" | "INFO" | "assembly" | "contig" | "SAMPLE" | "PEDIGREE" | "pedigreeDB");
meta_field_value= (print - "=")+ ;
meta_field_desc = (print)+ ;
meta_field_desc = (print - "\"")+ ;
meta_field = ( meta_key "=" meta_field_value ) >token_begin %meta_field_value_end ;

meta_alt = "ID=" (
("DEL" | "INS" | "DUP" | "INV" | "CNV") $err(meta_alt_id_prefix_err)
( ":" meta_field_value)? $err(meta_alt_id_suffix_err)
)
",Description=\"" meta_field_desc "\"" $err(meta_alt_desc_err) ;
",Description=\"" meta_field_desc "\"" $err(meta_desc_err) ;
meta_assembly = (print - ("\n" | "\t"))+ ; # URL
meta_contig = "ID=" identifier
meta_contig = "ID=" identifier $err(meta_id_err)
",URL=" meta_field_value ;
meta_filter = "ID=" identifier
",Description=\"" meta_field_desc "\"" ;
meta_format = "ID=" identifier
meta_filter = "ID=" identifier $err(meta_id_err)
",Description=\"" meta_field_desc "\"" $err(meta_desc_err) ;
meta_format = "ID=" identifier $err(meta_id_err)
",Number=" ( (digit)+ | "A" | "R" | "G" | "." )
",Type=" ( "Integer" | "Float" | "Character" | "String" )
",Description=\"" meta_field_desc "\"" ;
meta_info = "ID=" identifier
meta_info = "ID=" identifier $err(meta_id_err)
",Number=" ( (digit)+ | "A" | "R" | "G" | "." )
",Type=" ( "Integer" | "Float" | "Flag" | "Character" | "String" )
",Description=\"" meta_field_desc "\""
(",Source=\"" meta_field_value "\"")?
(",Version=\"" meta_field_value "\"")? ;
meta_pedigree = (identifier "=" identifier)+ ;
meta_pedigreeDB = (print - ("\n" | "\t"))+ ; # URL
meta_sample = "ID=" identifier
meta_sample = "ID=" identifier $err(meta_id_err)
",Genomes=" identifier ("," identifier)*
",Mixture=" identifier ("," identifier)*
",Descriptions=\"" meta_field_desc "\"" (",\"" meta_field_desc "\"")* ;

meta_entry = (
("##"
meta_key >token_begin %meta_id_end "=<" meta_field ("," meta_field)* ">") %(meta_generic_read) $err(meta_generic_err) |
("##" meta_key >token_begin %meta_id_end "=<" meta_field ("," meta_field)* ">") %(meta_generic_read) $err(meta_generic_err) |
("##ALT=<" meta_alt ">" ) %(meta_alt_read) $err(meta_alt_err) |
("##FILTER=<" meta_filter ">" ) %(meta_filter_read) $err(meta_filter_err) |
("##FORMAT=<" meta_format ">" ) %(meta_format_read) $err(meta_format_err) |
Expand Down
Loading

0 comments on commit 825c16b

Please sign in to comment.