Skip to content

Commit

Permalink
* samtools-0.1.4-8 (r338)
Browse files Browse the repository at this point in the history
 * parse the @rg header lines and allow to choose library at the "samtools view"
   command line
  • Loading branch information
Heng Li committed Jun 12, 2009
1 parent 431e646 commit 78de989
Show file tree
Hide file tree
Showing 8 changed files with 158 additions and 13 deletions.
1 change: 1 addition & 0 deletions bam.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ void bam_header_destroy(bam_header_t *header)
}
free(header->text);
#ifndef BAM_NO_HASH
if (header->rg2lib) bam_strmap_destroy(header->rg2lib);
bam_destroy_header_hash(header);
#endif
free(header);
Expand Down
11 changes: 9 additions & 2 deletions bam.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ typedef struct {
int32_t n_targets;
char **target_name;
uint32_t *target_len;
void *hash;
void *hash, *rg2lib;
int l_text;
char *text;
} bam_header_t;
Expand Down Expand Up @@ -317,9 +317,16 @@ extern "C" {

bam_header_t *sam_header_read(tamFile fp);
int sam_header_parse(bam_header_t *h);
int sam_header_parse_rg(bam_header_t *h);

#define sam_write1(header, b) bam_view1(header, b)

int bam_strmap_put(void *strmap, const char *rg, const char *lib);
const char *bam_strmap_get(const void *strmap, const char *rg);
void *bam_strmap_dup(const void*);
void *bam_strmap_init();
void bam_strmap_destroy(void *strmap);

/*!
@abstract Initialize a header structure.
@return the pointer to the header structure
Expand Down Expand Up @@ -593,7 +600,7 @@ extern "C" {

void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data);
// uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]); // an alias of bam_aux_get()
uint8_t *bam_aux_get(bam1_t *b, const char tag[2]);
uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]);
int32_t bam_aux2i(const uint8_t *s);
float bam_aux2f(const uint8_t *s);
double bam_aux2d(const uint8_t *s);
Expand Down
68 changes: 67 additions & 1 deletion bam_aux.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#include <ctype.h>
#include "bam.h"
#include "khash.h"
typedef char *str_p;
KHASH_MAP_INIT_STR(s, int)
KHASH_MAP_INIT_STR(r2l, str_p)

void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data)
{
Expand All @@ -23,7 +25,7 @@ uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2])
return bam_aux_get(b, tag);
}
*/
uint8_t *bam_aux_get(bam1_t *b, const char tag[2])
uint8_t *bam_aux_get(const bam1_t *b, const char tag[2])
{
uint8_t *s;
int y = tag[0]<<8 | tag[1];
Expand Down Expand Up @@ -158,6 +160,70 @@ char *bam_aux2Z(const uint8_t *s)
else return 0;
}

/******************
* rg2lib related *
******************/

int bam_strmap_put(void *rg2lib, const char *rg, const char *lib)
{
int ret;
khint_t k;
khash_t(r2l) *h = (khash_t(r2l)*)rg2lib;
char *key = strdup(rg);
k = kh_put(r2l, h, key, &ret);
if (ret) kh_val(h, k) = strdup(lib);
else {
fprintf(stderr, "[bam_rg2lib_put] duplicated @RG ID: %s\n", rg);
free(key);
}
return 0;
}

const char *bam_strmap_get(const void *rg2lib, const char *rg)
{
const khash_t(r2l) *h = (const khash_t(r2l)*)rg2lib;
khint_t k;
k = kh_get(r2l, h, rg);
if (k != kh_end(h)) return (const char*)kh_val(h, k);
else return 0;
}

void *bam_strmap_dup(const void *rg2lib)
{
const khash_t(r2l) *h = (const khash_t(r2l)*)rg2lib;
khash_t(r2l) *g = kh_init(r2l);
khint_t k, l;
int ret;
for (k = kh_begin(h); k < kh_end(h); ++k) {
if (kh_exist(h, k)) {
char *key = strdup(kh_key(h, k));
l = kh_put(r2l, g, key, &ret);
kh_val(g, l) = strdup(kh_val(h, k));
}
}
return g;
}

void *bam_strmap_init()
{
return (void*)kh_init(r2l);
}

void bam_strmap_destroy(void *rg2lib)
{
khash_t(r2l) *h = (khash_t(r2l)*)rg2lib;
khint_t k;
if (h == 0) return;
for (k = kh_begin(h); k < kh_end(h); ++k) {
if (kh_exist(h, k)) {
free((char*)kh_key(h, k)); free(kh_val(h, k));
}
}
kh_destroy(r2l, h);
}

/*** The following routines were implemented by Nils Homer for color-space support in tview ***/

char bam_aux_getCSi(bam1_t *b, int i)
{
uint8_t *c = bam_aux_get(b, "CS");
Expand Down
51 changes: 51 additions & 0 deletions bam_import.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include "kstring.h"
#include "bam.h"
#include "kseq.h"
#include "khash.h"
Expand Down Expand Up @@ -146,6 +147,55 @@ static inline void append_text(bam_header_t *header, kstring_t *str)
header->text[header->l_text] = 0;
}

int sam_header_parse_rg(bam_header_t *h)
{
kstring_t *rgid, *rglib;
char *p, *q, *s, *r;
int n = 0;

bam_strmap_destroy(h->rg2lib);
// parse @RG lines
h->rg2lib = bam_strmap_init();
rgid = calloc(1, sizeof(kstring_t));
rglib = calloc(1, sizeof(kstring_t));
s = h->text;
while ((s = strstr(s, "@RG")) != 0) {
if (rgid->l && rglib->l) {
bam_strmap_put(h->rg2lib, rgid->s, rglib->s);
++n;
}
rgid->l = rglib->l = 0;
s += 3;
r = s;
if ((p = strstr(s, "ID:")) != 0) {
q = p + 3;
for (p = q; *p && *p != '\t' && *p != '\r' && *p != '\n'; ++p);
kputsn(q, p - q, rgid);
} else {
fprintf(stderr, "[bam_header_parse] missing ID tag in @RG lines.\n");
break;
}
if (r < p) r = p;
if ((p = strstr(s, "LB:")) != 0) {
q = p + 3;
for (p = q; *p && *p != '\t' && *p != '\r' && *p != '\n'; ++p);
kputsn(q, p - q, rglib);
} else {
fprintf(stderr, "[bam_header_parse] missing LB tag in @RG lines.\n");
break;
}
if (r < p) r = p;
s = r + 3;
}
if (rgid->l && rglib->l) {
bam_strmap_put(h->rg2lib, rgid->s, rglib->s);
++n;
}
free(rgid->s); free(rgid);
free(rglib->s); free(rglib);
return n;
}

int sam_header_parse(bam_header_t *h)
{
int i;
Expand Down Expand Up @@ -183,6 +233,7 @@ int sam_header_parse(bam_header_t *h)
s = r + 3;
++i;
}
sam_header_parse_rg(h);
return h->n_targets;

header_err_ret:
Expand Down
2 changes: 1 addition & 1 deletion bamtk.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include "bam.h"

#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.1.4-7 (r337)"
#define PACKAGE_VERSION "0.1.4-8 (r338)"
#endif

int bam_taf2baf(int argc, char *argv[]);
Expand Down
11 changes: 8 additions & 3 deletions kstring.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,24 @@ typedef struct __kstring_t {
int ksprintf(kstring_t *s, const char *fmt, ...);
int ksplit_core(char *s, int delimiter, int *_max, int **_offsets);

static inline int kputs(const char *p, kstring_t *s)
static inline int kputsn(const char *p, int l, kstring_t *s)
{
int l = strlen(p);
if (s->l + l + 1 >= s->m) {
s->m = s->l + l + 2;
kroundup32(s->m);
s->s = (char*)realloc(s->s, s->m);
}
strcpy(s->s + s->l, p);
strncpy(s->s + s->l, p, l);
s->l += l;
s->s[s->l] = 0;
return l;
}

static inline int kputs(const char *p, kstring_t *s)
{
return kputsn(p, strlen(p), s);
}

static inline int kputc(int c, kstring_t *s)
{
if (s->l + 1 >= s->m) {
Expand Down
2 changes: 2 additions & 0 deletions sam.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ bam_header_t *bam_header_dup(const bam_header_t *h0)
h->target_len[i] = h0->target_len[i];
h->target_name[i] = strdup(h0->target_name[i]);
}
if (h0->rg2lib) h->rg2lib = bam_strmap_dup(h0->rg2lib);
return h;
}

Expand Down Expand Up @@ -46,6 +47,7 @@ samfile_t *samopen(const char *fn, const char *mode, const void *aux)
fprintf(stderr, "[samopen] no @SQ lines in the header.\n");
} else fprintf(stderr, "[samopen] SAM header is present: %d sequences.\n", fp->header->n_targets);
}
sam_header_parse_rg(fp->header);
} else if (mode[0] == 'w') { // write
fp->header = bam_header_dup((const bam_header_t*)aux);
if (mode[1] == 'b') { // binary
Expand Down
25 changes: 19 additions & 6 deletions sam_view.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,26 @@
#include "sam.h"

static int g_min_mapQ = 0, g_flag_on = 0, g_flag_off = 0;
static char *g_library;

#define __g_skip_aln(b) (((b)->core.qual < g_min_mapQ) || ((b->core.flag & g_flag_on) != g_flag_on) \
|| (b->core.flag & g_flag_off))
static inline int __g_skip_aln(const bam_header_t *h, const bam1_t *b)
{
if (b->core.qual < g_min_mapQ || ((b->core.flag & g_flag_on) != g_flag_on) || (b->core.flag & g_flag_off))
return 1;
if (g_library) {
uint8_t *s = bam_aux_get(b, "RG");
if (s) {
const char *p = bam_strmap_get(h->rg2lib, s + 1);
return (p && strcmp(p, g_library) == 0)? 0 : 1;
} else return 1;
} else return 0;
}

// callback function for bam_fetch()
static int view_func(const bam1_t *b, void *data)
{
if (!__g_skip_aln(b)) samwrite((samfile_t*)data, b);
if (!__g_skip_aln(((samfile_t*)data)->header, b))
samwrite((samfile_t*)data, b);
return 0;
}

Expand All @@ -26,7 +38,7 @@ int main_samview(int argc, char *argv[])

/* parse command-line options */
strcpy(in_mode, "r"); strcpy(out_mode, "w");
while ((c = getopt(argc, argv, "Sbt:hHo:q:f:F:u")) >= 0) {
while ((c = getopt(argc, argv, "Sbt:hHo:q:f:F:ul:")) >= 0) {
switch (c) {
case 'S': is_bamin = 0; break;
case 'b': is_bamout = 1; break;
Expand All @@ -38,6 +50,7 @@ int main_samview(int argc, char *argv[])
case 'F': g_flag_off = strtol(optarg, 0, 0); break;
case 'q': g_min_mapQ = atoi(optarg); break;
case 'u': is_uncompressed = 1; break;
case 'l': g_library = strdup(optarg); break;
default: return usage();
}
}
Expand All @@ -64,7 +77,7 @@ int main_samview(int argc, char *argv[])
bam1_t *b = bam_init1();
int r;
while ((r = samread(in, b)) >= 0) // read one alignment from `in'
if (!__g_skip_aln(b))
if (!__g_skip_aln(in->header, b))
samwrite(out, b); // write the alignment to `out'
if (r < -1) fprintf(stderr, "[main_samview] truncated file.\n");
bam_destroy1(b);
Expand All @@ -91,7 +104,7 @@ int main_samview(int argc, char *argv[])

view_end:
// close files, free and return
free(fn_list); free(fn_out);
free(fn_list); free(fn_out); free(g_library);
samclose(in);
samclose(out);
return ret;
Expand Down

0 comments on commit 78de989

Please sign in to comment.