-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from sfu-ireceptor/AIRR-compliance
Airr compliance
- Loading branch information
Showing
7 changed files
with
2,689 additions
and
5,948 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,211 @@ | ||
# | ||
# Schema definitions for AIRR Formats rearrangement objects | ||
# | ||
|
||
# The Formats rearrangements | ||
# Note: This is an extension of the MiAIRR rearrangements, and assumes that | ||
# the MiAIRR has defined the MiAIRR rearrangement objects. | ||
# required: | ||
# - sequence_id | ||
# - sequence | ||
# - sample_id | ||
# - constant | ||
# - functional | ||
# - rev_comp | ||
# - c_call | ||
# - v_score | ||
# - d_score | ||
# - j_score | ||
# - c_score | ||
# - v_cigar | ||
# - d_cigar | ||
# - j_cigar | ||
# - c_cigar | ||
|
||
|
||
Formats_Rearrangements: | ||
discriminator: AIRRFormats | ||
type: object | ||
properties: | ||
sequence_id: | ||
type: string | ||
description: Read/sequence identifier | ||
sequence: | ||
type: string | ||
description: Nucleotide sequence (e.g., the "read" sequence; revcomp'd if | ||
necessary) | ||
sample_id: | ||
type: string | ||
description: The biological sample this read derives from (e.g., from BioSample | ||
database) | ||
constant: | ||
type: string | ||
description: Constant region gene (e.g., IGHG4, IGHA2, IGHE, TRBC) | ||
functional: | ||
type: boolean | ||
description: VDJ sequence is predicted to be functional | ||
rev_comp: | ||
type: boolean | ||
description: Sequence is reverse complemented | ||
c_call: | ||
type: string | ||
description: C gene assignment | ||
v_score: | ||
type: number | ||
description: V alignment score | ||
d_score: | ||
type: number | ||
description: D alignment score | ||
j_score: | ||
type: number | ||
description: J alignment score | ||
c_score: | ||
type: number | ||
description: C alignment score | ||
v_cigar: | ||
type: string | ||
description: V alignment CIGAR string | ||
d_cigar: | ||
type: string | ||
description: D alignment CIGAR string | ||
j_cigar: | ||
type: string | ||
description: J alignment CIGAR string | ||
c_cigar: | ||
type: string | ||
description: C alignment CIGAR string | ||
v_evalue: | ||
type: number | ||
description: V alignment E-value (when applicable) | ||
d_evalue: | ||
type: number | ||
description: D alignment E-value (when applicable) | ||
j_evalue: | ||
type: number | ||
description: J alignment E-value (when applicable) | ||
v_identity: | ||
type: number | ||
description: V alignment identity | ||
d_identity: | ||
type: number | ||
description: D alignment identity | ||
j_identity: | ||
type: number | ||
description: J alignment identity | ||
vdj_score: | ||
type: number | ||
description: Score for aligners that consider the full sequence as a whole | ||
vdj_evalue: | ||
type: number | ||
description: E-value for aligners that consider the full sequence as a whole | ||
vdj_identity: | ||
type: number | ||
description: Identity for aligners that consider the full sequence as a whole | ||
vdj_cigar: | ||
type: string | ||
description: VDJ alignment CIGAR string | ||
v_start: | ||
type: integer | ||
description: Position of first V nucleotide in 'sequence' field | ||
v_germ_start: | ||
type: integer | ||
description: Position of 'v_start' field in IMGT numbered germline V(D)J sequence | ||
fwr1_start: | ||
type: integer | ||
description: FWR1 start coordinate in sequence (transferred from germline) | ||
fwr1_end: | ||
type: integer | ||
description: FWR1 end coordinate in sequence (transferred from germline) | ||
cdr1_start: | ||
type: integer | ||
description: CDR1 start coord in sequence (transferred from germline) | ||
cdr1_end: | ||
type: integer | ||
description: CDR1 end coord in sequence (transferred from germline) | ||
fwr2_start: | ||
type: integer | ||
description: FWR2 start coord in sequence (transferred from germline) | ||
fwr2_end: | ||
type: integer | ||
description: FWR2 end coord in sequence (transferred from germline) | ||
cdr2_start: | ||
type: integer | ||
description: CDR2 start coord in sequence (transferred from germline) | ||
cdr2_end: | ||
type: integer | ||
description: CDR2 end coord in sequence (transferred from germline) | ||
fwr3_start: | ||
type: integer | ||
description: FWR3 start coord in sequence (transferred from germline) | ||
fwr3_end: | ||
type: integer | ||
description: FWR3 end coord in sequence (transferred from germline) | ||
cdr3_start: | ||
type: integer | ||
description: CDR3 start coord in sequence (transferred from germline) | ||
cdr3_end: | ||
type: integer | ||
description: CDR3 end coord in sequence (transferred from germline) | ||
fwr4_start: | ||
type: integer | ||
description: FWR3 start coord in sequence (transferred from germline) | ||
fwr4_end: | ||
type: integer | ||
description: FWR4 end coord in sequence (transferred from germline) | ||
v_end: | ||
type: integer | ||
description: End coordinate of the V segment (generally inside the CDR3) | ||
d_start: | ||
type: integer | ||
description: Start coordinate of the D segment | ||
d_germ_start: | ||
type: integer | ||
description: Position of 'd_start' field in IMGT numbered germline V(D)J sequence | ||
d_end: | ||
type: integer | ||
description: End coordinate of the D segment | ||
j_start: | ||
type: integer | ||
description: Start coordinate of the J segment (generally inside the CDR3) | ||
j_germ_start: | ||
type: integer | ||
description: Position of 'j_start' field in IMGT numbered germline V(D)J sequence | ||
j_end: | ||
type: integer | ||
description: End coordinate of the J segment | ||
junction_length: | ||
type: integer | ||
description: Number of junction nucleotides in sequence_vdj | ||
np1_length: | ||
type: integer | ||
description: Number of of nucleotides between sample V and D sequences | ||
np2_length: | ||
type: integer | ||
description: Number of of nucleotides between sample D and J sequences | ||
n1_length: | ||
type: integer | ||
description: Nucleotides 5' of the D-segment | ||
n2_length: | ||
type: integer | ||
description: Nucleotides 3' of the D-segment | ||
p3v_length: | ||
type: integer | ||
description: Palindromic nucleotides 3' of the V-segment | ||
p5d_length: | ||
type: integer | ||
description: Palindromic nucleotides 5' of the D-segment | ||
p3d_length: | ||
type: integer | ||
description: Palindromic nucleotides 3' of the D-segment | ||
p5j_length: | ||
type: integer | ||
description: Palindromic nucleotides 5' of the J-segment | ||
duplicate_count: | ||
type: integer | ||
description: Number of duplicate reads for this sequence | ||
consensus_count: | ||
type: integer | ||
description: Number of reads contributing to the consensus for this sequence | ||
clone: | ||
type: string | ||
description: Clone assignment for this sequence |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# | ||
# Schema definitions for AIRR minimal standards rearrangement objects | ||
# | ||
|
||
# The MiAIRR rearrangements | ||
|
||
MiAIRR_Rearrangements: | ||
discriminator: MiAIRR | ||
type: object | ||
properties: | ||
germline_database: | ||
type: string | ||
description: > | ||
Source of germline V(D)J segments, with version number or | ||
date accessed (e.g. IMGT/V-QUEST Release 201736-4 (7 September 2017)) (AIRR) | ||
cell_index: | ||
type: integer | ||
description: | | ||
Cell Index (UID linking sequences from the same cell) (AIRR) | ||
v_allele: | ||
type: string | ||
description: | | ||
V gene with allele (e.g. IGHV4-59*01) (AIRR) | ||
d_allele: | ||
type: string | ||
description: | | ||
D gene with allele (e.g. IGHD3-10*01) (AIRR) | ||
j_allele: | ||
type: string | ||
description: | | ||
J gene with allele (e.g. IGHJ4*02) (AIRR) | ||
c_allele: | ||
type: string | ||
description: | | ||
C region with allele (AIRR) | ||
junction_nt: | ||
type: string | ||
description: > | ||
JUNCTION nucleotide sequence (Sequence of the CDR3 | ||
region (nucleotide)) (AIRR) | ||
junction_aa: | ||
type: string | ||
description: > | ||
JUNCTION amino acid sequence (Sequence of the CDR3 | ||
region (amino acid)) (AIRR) | ||
duplicate_count: | ||
type: integer | ||
description: | | ||
Number of times the sequence has been duplicated in a file (AIRR) | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.