Skip to content

Commit

Permalink
* samtools-0.1.6-18 (r493)
Browse files Browse the repository at this point in the history
 * fixed the bugs due to improperly incorporating Petr's header parser
 * a little code clean up in sam_header.c
  • Loading branch information
Heng Li committed Oct 26, 2009
1 parent 2081e53 commit 87d9ea7
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 43 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
CC= gcc
CFLAGS= -g -Wall #-O2 #-m64 #-arch ppc
CFLAGS= -g -Wall -O2 #-m64 #-arch ppc
DFLAGS= -D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE -D_CURSES_LIB=1
LOBJS= bgzf.o kstring.o bam_aux.o bam.o bam_import.o sam.o bam_index.o \
bam_pileup.o bam_lpileup.o bam_md.o glf.o razf.o faidx.o knetfile.o \
Expand Down
3 changes: 2 additions & 1 deletion bam.c
Original file line number Diff line number Diff line change
Expand Up @@ -292,11 +292,12 @@ void bam_view1(const bam_header_t *header, const bam1_t *b)
free(s);
}

// FIXME: we should also check the LB tag associated with each alignment
const char *bam_get_library(bam_header_t *h, const bam1_t *b)
{
const uint8_t *rg;
if (h->dict == 0) h->dict = sam_header_parse2(h->text);
if (h->rg2lib) h->rg2lib = sam_header2tbl(h->dict, "RG", "ID", "LB");
if (h->rg2lib == 0) h->rg2lib = sam_header2tbl(h->dict, "RG", "ID", "LB");
rg = bam_aux_get(b, "RG");
return (rg == 0)? 0 : sam_tbl_get(h->rg2lib, (const char*)(rg + 1));
}
8 changes: 6 additions & 2 deletions bam_import.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,14 @@ int sam_header_parse(bam_header_t *h)
h->n_targets = 0; h->target_len = 0; h->target_name = 0;
if (h->l_text < 3) return 0;
if (h->dict == 0) h->dict = sam_header_parse2(h->text);
h->target_name = sam_header2list(h->dict, "SQ", "SN", &h->n_targets);
tmp = sam_header2list(h->dict, "SQ", "SN", &h->n_targets);
if (h->n_targets == 0) return 0;
h->target_name = calloc(h->n_targets, sizeof(void*));
for (i = 0; i < h->n_targets; ++i)
h->target_name[i] = strdup(tmp[i]);
free(tmp);
tmp = sam_header2list(h->dict, "SQ", "LN", &h->n_targets);
h->target_len = (uint32_t*)calloc(h->n_targets, 4);
h->target_len = calloc(h->n_targets, 4);
for (i = 0; i < h->n_targets; ++i)
h->target_len[i] = atoi(tmp[i]);
free(tmp);
Expand Down
2 changes: 1 addition & 1 deletion bamtk.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#endif

#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.1.6-17 (r484)"
#define PACKAGE_VERSION "0.1.6-18 (r493)"
#endif

int bam_taf2baf(int argc, char *argv[]);
Expand Down
2 changes: 1 addition & 1 deletion sam.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ bam_header_t *bam_header_dup(const bam_header_t *h0)
int i;
h = bam_header_init();
*h = *h0;
h->hash = 0;
h->hash = h->dict = h->rg2lib = 0;
h->text = (char*)calloc(h->l_text + 1, 1);
memcpy(h->text, h0->text, h->l_text);
h->target_len = (uint32_t*)calloc(h->n_targets, 4);
Expand Down
82 changes: 45 additions & 37 deletions sam_header.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,24 +50,15 @@ const char **required_tags[] = {r_hd_tags,r_sq_tags,r_rg_tags,r_pg_tags,NULL,NUL
const char **unique_tags[] = {NULL, u_sq_tags,u_rg_tags,NULL,NULL,NULL};


void debug(const char *format, ...)
static void debug(const char *format, ...)
{
va_list ap;
va_start(ap, format);
vfprintf(stderr, format, ap);
va_end(ap);
}

void error(const char *format, ...)
{
va_list ap;
va_start(ap, format);
vfprintf(stderr, format, ap);
va_end(ap);
exit(-1);
}

list_t *list_append(list_t *root, void *data)
static list_t *list_append(list_t *root, void *data)
{
list_t *l = root;
while (l && l->next)
Expand All @@ -87,7 +78,7 @@ list_t *list_append(list_t *root, void *data)
return root;
}

void list_free(list_t *root)
static void list_free(list_t *root)
{
list_t *l = root;
while (root)
Expand All @@ -101,7 +92,7 @@ void list_free(list_t *root)


// Look for a tag "XY" in a predefined const char *[] array.
int tag_exists(const char *tag, const char **tags)
static int tag_exists(const char *tag, const char **tags)
{
int itag=0;
if ( !tags ) return -1;
Expand All @@ -118,7 +109,7 @@ int tag_exists(const char *tag, const char **tags)
// Mimics the behaviour of getline, except it returns pointer to the next chunk of the text
// or NULL if everything has been read. The lineptr should be freed by the caller. The
// newline character is stripped.
const char *nextline(char **lineptr, size_t *n, const char *text)
static const char *nextline(char **lineptr, size_t *n, const char *text)
{
int len;
const char *to = text;
Expand Down Expand Up @@ -147,8 +138,10 @@ const char *nextline(char **lineptr, size_t *n, const char *text)
*lineptr = realloc(*lineptr, len);
*n = len;
}
if ( !*lineptr )
error("FIXME\n");
if ( !*lineptr ) {
debug("[nextline] Insufficient memory!\n");
return 0;
}

memcpy(*lineptr,text,len);
(*lineptr)[len-1] = 0;
Expand All @@ -158,7 +151,7 @@ const char *nextline(char **lineptr, size_t *n, const char *text)

// name points to "XY", value_from points to the first character of the value string and
// value_to points to the last character of the value string.
HeaderTag *new_tag(const char *name, const char *value_from, const char *value_to)
static HeaderTag *new_tag(const char *name, const char *value_from, const char *value_to)
{
HeaderTag *tag = malloc(sizeof(HeaderTag));
int len = value_to-value_from+1;
Expand All @@ -171,7 +164,7 @@ HeaderTag *new_tag(const char *name, const char *value_from, const char *value_t
return tag;
}

HeaderTag *header_line_has_tag(HeaderLine *hline, const char *key)
static HeaderTag *header_line_has_tag(HeaderLine *hline, const char *key)
{
list_t *tags = hline->tags;
while (tags)
Expand All @@ -189,15 +182,18 @@ HeaderTag *header_line_has_tag(HeaderLine *hline, const char *key)
// 1 .. all tags identical -> no need to merge, drop one
// 2 .. the unique tags match and there are some conflicting tags (same tag, different value) -> error, cannot be merged nor duplicated
// 3 .. there are some missing complementary tags and no unique conflict -> can be merged into a single line
int sam_header_compare_lines(HeaderLine *hline1, HeaderLine *hline2)
static int sam_header_compare_lines(HeaderLine *hline1, HeaderLine *hline2)
{
HeaderTag *t1, *t2;

if ( hline1->type[0]!=hline2->type[0] || hline1->type[1]!=hline2->type[1] )
return 0;

int itype = tag_exists(hline1->type,types);
if ( itype==-1 ) error("[sam_header_compare_lines] Unknown type [%c%c]\n", hline1->type[0],hline1->type[1]);
if ( itype==-1 ) {
debug("[sam_header_compare_lines] Unknown type [%c%c]\n", hline1->type[0],hline1->type[1]);
return -1; // FIXME (lh3): error; I do not know how this will be handled in Petr's code
}

if ( unique_tags[itype] )
{
Expand Down Expand Up @@ -261,7 +257,7 @@ int sam_header_compare_lines(HeaderLine *hline1, HeaderLine *hline2)
}


HeaderLine *sam_header_line_clone(const HeaderLine *hline)
static HeaderLine *sam_header_line_clone(const HeaderLine *hline)
{
list_t *tags;
HeaderLine *out = malloc(sizeof(HeaderLine));
Expand All @@ -285,7 +281,7 @@ HeaderLine *sam_header_line_clone(const HeaderLine *hline)
return out;
}

int sam_header_line_merge_with(HeaderLine *out_hline, const HeaderLine *tmpl_hline)
static int sam_header_line_merge_with(HeaderLine *out_hline, const HeaderLine *tmpl_hline)
{
list_t *tmpl_tags;

Expand All @@ -311,18 +307,24 @@ int sam_header_line_merge_with(HeaderLine *out_hline, const HeaderLine *tmpl_hli
}


HeaderLine *sam_header_line_parse(const char *headerLine)
static HeaderLine *sam_header_line_parse(const char *headerLine)
{
HeaderLine *hline;
HeaderTag *tag;
const char *from, *to;
from = headerLine;

if ( *from != '@' ) error("[sam_header_line_parse] expected '@', got [%s]\n", headerLine);
if ( *from != '@' ) {
debug("[sam_header_line_parse] expected '@', got [%s]\n", headerLine);
return 0;
}
to = ++from;

while (*to && *to!='\t') to++;
if ( to-from != 2 ) error("[sam_header_line_parse] expected '@XY', got [%s]\n", headerLine);
if ( to-from != 2 ) {
debug("[sam_header_line_parse] expected '@XY', got [%s]\n", headerLine);
return 0;
}

hline = malloc(sizeof(HeaderLine));
hline->type[0] = from[0];
Expand All @@ -333,8 +335,10 @@ HeaderLine *sam_header_line_parse(const char *headerLine)

from = to;
while (*to && *to=='\t') to++;
if ( to-from != 1 )
error("[sam_header_line_parse] multiple tabs on line [%s] (%d)\n", headerLine,(int)(to-from));
if ( to-from != 1 ) {
debug("[sam_header_line_parse] multiple tabs on line [%s] (%d)\n", headerLine,(int)(to-from));
return 0;
}
from = to;
while (*from)
{
Expand All @@ -351,8 +355,10 @@ HeaderLine *sam_header_line_parse(const char *headerLine)

from = to;
while (*to && *to=='\t') to++;
if ( *to && to-from != 1 )
error("[sam_header_line_parse] multiple tabs on line [%s] (%d)\n", headerLine,(int)(to-from));
if ( *to && to-from != 1 ) {
debug("[sam_header_line_parse] multiple tabs on line [%s] (%d)\n", headerLine,(int)(to-from));
return 0;
}

from = to;
}
Expand All @@ -361,7 +367,7 @@ HeaderLine *sam_header_line_parse(const char *headerLine)


// Must be of an existing type, all tags must be recognised and all required tags must be present
int sam_header_line_validate(HeaderLine *hline)
static int sam_header_line_validate(HeaderLine *hline)
{
list_t *tags;
HeaderTag *tag;
Expand Down Expand Up @@ -405,7 +411,7 @@ int sam_header_line_validate(HeaderLine *hline)
}


void print_header_line(FILE *fp, HeaderLine *hline)
static void print_header_line(FILE *fp, HeaderLine *hline)
{
list_t *tags = hline->tags;
HeaderTag *tag;
Expand All @@ -426,7 +432,7 @@ void print_header_line(FILE *fp, HeaderLine *hline)
}


void sam_header_line_free(HeaderLine *hline)
static void sam_header_line_free(HeaderLine *hline)
{
list_t *tags = hline->tags;
while (tags)
Expand Down Expand Up @@ -526,18 +532,18 @@ void *sam_header_parse2(const char *headerText)
size_t nbuf = 0;

if ( !headerText )
error("FIXME");
return 0;

text = headerText;
while ( (text=nextline(&buf, &nbuf, text)) )
{
hline = sam_header_line_parse(buf);
if ( sam_header_line_validate(hline) )
if ( hline && sam_header_line_validate(hline) )
hlines = list_append(hlines, hline);
else
{
sam_header_line_free(hline);
sam_header_free(hlines);
if (hline) sam_header_line_free(hline);
sam_header_free(hlines);
if ( buf ) free(buf);
return NULL;
}
Expand All @@ -555,6 +561,7 @@ void *sam_header2tbl(const void *_dict, char type[2], char key_tag[2], char valu
khiter_t k;
int ret;

if (_dict == 0) return tbl; // return an empty (not null) hash table
while (l)
{
HeaderLine *hline = l->data;
Expand Down Expand Up @@ -672,7 +679,8 @@ void *sam_header_merge(int n, const void **_dicts)
{
print_header_line(stderr,tmpl_hlines->data);
print_header_line(stderr,out_hlines->data);
error("Conflicting lines, cannot merge the headers.\n");
debug("Conflicting lines, cannot merge the headers.\n");
return 0;
}
if ( status==3 )
sam_header_line_merge_with(out_hlines->data, tmpl_hlines->data);
Expand Down

0 comments on commit 87d9ea7

Please sign in to comment.