Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes involving CRAM and header API for long references. #976

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 4 additions & 28 deletions header.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ typedef khash_t(rm) rmhash_t;
static int sam_hdr_link_pg(sam_hdr_t *bh);

static int sam_hrecs_vupdate(sam_hrecs_t *hrecs, sam_hrec_type_t *type, va_list ap);
static int sam_hrecs_update(sam_hrecs_t *hrecs, sam_hrec_type_t *type, ...);


#define MAX_ERROR_QUOTE 320 // Prevent over-long error messages
Expand Down Expand Up @@ -182,14 +181,10 @@ static int sam_hrecs_update_hashes(sam_hrecs_t *hrecs,
if (hrecs->ref[nref].ty == NULL) {
// Attach header line to existing stub entry.
hrecs->ref[nref].ty = h_type;
// Check lengths match; correct if not.
if (len != hrecs->ref[nref].len) {
char tmp[32];
snprintf(tmp, sizeof(tmp), "%" PRIhts_pos,
hrecs->ref[nref].len);
if (sam_hrecs_update(hrecs, h_type, "LN", tmp, NULL) < 0)
return -1;
}
// The old hrecs length may be incorrect as it is initially
// populated from h->target_len which has a max 32-bit size.
// We always take our latest data as canonical.
hrecs->ref[nref].len = len;
if (sam_hrecs_add_ref_altnames(hrecs, nref, altnames) < 0)
return -1;

Expand Down Expand Up @@ -2485,25 +2480,6 @@ int sam_hrecs_vupdate(sam_hrecs_t *hrecs, sam_hrec_type_t *type, va_list ap) {
return 0;
}

/*
* Adds or updates tag key,value pairs in a header line.
* Eg for adding M5 tags to @SQ lines or updating sort order for the
* @HD line.
*
* Specify multiple key,value pairs ending in NULL.
*
* Returns 0 on success
* -1 on failure
*/
static int sam_hrecs_update(sam_hrecs_t *hrecs, sam_hrec_type_t *type, ...) {
va_list args;
int res;
va_start(args, type);
res = sam_hrecs_vupdate(hrecs, type, args);
va_end(args);
return res;
}

/*
* Looks for a specific key in a single sam header line identified by *type.
* If prev is non-NULL it also fills this out with the previous tag, to
Expand Down
55 changes: 55 additions & 0 deletions sam.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,53 @@ void sam_hdr_destroy(sam_hdr_t *bh)
free(bh);
}

/*
* Create or update the header sdict field from the new
* parsed hrecs data. This is important for sam_hdr_dup or
* when reading / creating a new header.
*
* Returns 0 on success,
* -1 on failure
*/
static int sam_hdr_update_sdict(sam_hdr_t *h) {
int i;
khash_t(s2i) *long_refs = h->sdict;

if (!h->hrecs)
return 0;

for (i = 0; i < h->n_targets; i++) {
hts_pos_t len = sam_hdr_tid2len(h, i);
const char *name = sam_hdr_tid2name(h, i);
khint_t k;
if (len < UINT32_MAX) {
if (!long_refs)
continue;

// Check if we have an old length, if so remove it.
k = kh_get(s2i, long_refs, name);
if (k < kh_end(long_refs))
kh_del(s2i, long_refs, k);
}

if (!long_refs) {
if (!(h->sdict = long_refs = kh_init(s2i)))
goto err;
}

// Add / update length
int absent;
k = kh_put(s2i, long_refs, name, &absent);
if (absent < 0)
goto err;
kh_val(long_refs, k) = len;
}
return 0;

err:
return -1;
}

sam_hdr_t *sam_hdr_dup(const sam_hdr_t *h0)
{
if (h0 == NULL) return NULL;
Expand Down Expand Up @@ -179,6 +226,14 @@ sam_hdr_t *sam_hdr_dup(const sam_hdr_t *h0)
h->text[h->l_text] = '\0';
}

if (h0->sdict) {
// We have a reason to use new API, so build it so we
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jkbonfield As discussed, this would be better addressed inside sam_hdr_update_target_arrays.

// can replicate sdict.
if (sam_hdr_fill_hrecs(h) < 0 ||
sam_hdr_update_sdict(h) < 0)
goto fail;
}

return h;

fail:
Expand Down
6 changes: 6 additions & 0 deletions test/test.pl
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,12 @@ sub test_view
testv $opts, "./test_view $tv_args -p longrefs/longref.tmp.sam_ longrefs/longref.tmp.sam.gz";
testv $opts, "./compare_sam.pl longrefs/longref.sam longrefs/longref.tmp.sam_";

# CRAM disabled for now as the positions cannot be 32-bit. (These tests are useful for
# checking SQ headers only.)
# testv $opts, "./test_view $tv_args -C -o no_ref -p longrefs/longref.tmp.cram longrefs/longref.sam";
# testv $opts, "./test_view $tv_args -p longrefs/longref.tmp.sam_ longrefs/longref.tmp.cram";
# testv $opts, "./compare_sam.pl longrefs/longref.sam longrefs/longref.tmp.sam_";

# Build index and compare with on-the-fly one made earlier.
test_compare $opts, "$$opts{path}/test_index -c longrefs/longref.tmp.sam.gz", "longrefs/longref.tmp.sam.gz.csi.otf", "longrefs/longref.tmp.sam.gz.csi", gz=>1;

Expand Down