Skip to content

Commit

Permalink
Fix Issue ultimatesource#45 - Multiple familes in ped file
Browse files Browse the repository at this point in the history
- Merge family ID and individual ID in `ped.h` to make id unique. Use an "unique/unusual/obscure" delimiter, `_F@I_`, to concatenate these two IDs, then everything will be unique in `table_`. The `id()` function take care of the rest.
  - A simple delimiter (`_`, `@` ...etc) is avoid to prevent the case that family name is something like `*f1@` or `*f1_`.
- Update related unittests.
  • Loading branch information
stevenhwu committed Oct 4, 2016
1 parent 8a5b064 commit 1caba64
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 29 deletions.
10 changes: 5 additions & 5 deletions Tests/DngCall/BamTest.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ set(TagLB-CMD @DNG_CALL_EXE@ --rgtag "LB" -f sample-5.3_ref.fasta.gz -p ceu.ped
set(TagLB-WD "@TESTDATA_DIR@/sample_5_3/")
set(TagLB-RESULT 0)
set(TagLB-STDOUT
"FORMAT\tGL-1\tGL-2\tLB-NA12878-Solexa-135852\tLB-NA12891-Solexa-135851\tLB-NA12892-Solexa-135853"
"FORMAT\tGL-1_F@I_1\tGL-1_F@I_2\tLB-NA12878-Solexa-135852\tLB-NA12891-Solexa-135851\tLB-NA12892-Solexa-135853"
)

###############################################################################
Expand All @@ -31,7 +31,7 @@ set(TagSM-CMD @DNG_CALL_EXE@ --rgtag "SM" -f sample-5.3_ref.fasta.gz -p ceu.ped
set(TagSM-WD "@TESTDATA_DIR@/sample_5_3/")
set(TagSM-RESULT 0)
set(TagSM-STDOUT
"FORMAT\tGL-1\tGL-2\tLB-NA12878\tLB-NA12891\tLB-NA12892"
"FORMAT\tGL-1_F@I_1\tGL-1_F@I_2\tLB-NA12878\tLB-NA12891\tLB-NA12892"
)

###############################################################################
Expand All @@ -41,7 +41,7 @@ set(TagID-CMD @DNG_CALL_EXE@ --rgtag "ID" -f sample-5.3_ref.fasta.gz -p ceu.ped
set(TagID-WD "@TESTDATA_DIR@/sample_5_3/")
set(TagID-RESULT 0)
set(TagID-STDOUT
"FORMAT\tGL-1\tGL-2\tSM-NA12891\tSM-NA12892\tSM-NA12878\tLB-NA12878-H06HD.1\tLB-NA12878-H06HD.2\tLB-NA12878-H06JU\\.1\tLB-NA12891-H03N7\\.1\tLB-NA12891-H03N7\\.2\tLB-NA12891-H05F1\\.2\tLB-NA12892-H06JH\\.1\tLB-NA12892-H06JH\\.2\tLB-NA12892-H06JU\\.2"
"FORMAT\tGL-1_F@I_1\tGL-1_F@I_2\tSM-NA12891\tSM-NA12892\tSM-NA12878\tLB-NA12878-H06HD.1\tLB-NA12878-H06HD.2\tLB-NA12878-H06JU\\.1\tLB-NA12891-H03N7\\.1\tLB-NA12891-H03N7\\.2\tLB-NA12891-H05F1\\.2\tLB-NA12892-H06JH\\.1\tLB-NA12892-H06JH\\.2\tLB-NA12892-H06JU\\.2"
)

###############################################################################
Expand All @@ -51,14 +51,14 @@ set(SepHeader1-CMD @DNG_CALL_EXE@ -f sample-5.3_ref.fasta.gz -p ceu.ped -m 0.001
set(SepHeader1-WD "@TESTDATA_DIR@/sep_header/")
set(SepHeader1-RESULT 0)
set(SepHeader1-STDOUT
"FORMAT\tGL-1\tGL-2\tLB-NA12878-Solexa-135852\tLB-NA12891-Solexa-135851\tLB-NA12892-Solexa-135853"
"FORMAT\tGL-1_F@I_1\tGL-1_F@I_2\tLB-NA12878-Solexa-135852\tLB-NA12891-Solexa-135851\tLB-NA12892-Solexa-135853"
)

set(SepHeader2-CMD @DNG_CALL_EXE@ -f sample-5.3_ref.fasta.gz -p ceu.ped -m 0.001 -h test1_hdr.sam test1_nohdr.sam)
set(SepHeader2-WD "@TESTDATA_DIR@/sep_header/")
set(SepHeader2-RESULT 0)
set(SepHeader2-STDOUT
"FORMAT\tGL-1\tGL-2\tLB-NA12878-Solexa-135852\tLB-NA12891-Solexa-135851\tLB-NA12892-Solexa-135853"
"FORMAT\tGL-1_F@I_1\tGL-1_F@I_2\tLB-NA12878-Solexa-135852\tLB-NA12891-Solexa-135851\tLB-NA12892-Solexa-135853"
)

###############################################################################
Expand Down
32 changes: 16 additions & 16 deletions Tests/Unit/dng/relationship_graph_m12.cc
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 +133,11 @@ BOOST_FIXTURE_TEST_CASE(test_constructor, FixturePedigreeMid ) {
auto labels = relationship_graph.labels();

const std::vector<std::string> expected_labels = {
"GL-1", "GL-2",
"GL-4", "GL-5",
"GL-9", "GL-10",
"GL-3", "GL-6",
"GL-11", "GL-8",
"GL-1_F@I_1", "GL-1_F@I_2",
"GL-1_F@I_4", "GL-1_F@I_5",
"GL-1_F@I_9", "GL-1_F@I_10",
"GL-1_F@I_3", "GL-1_F@I_6",
"GL-1_F@I_11", "GL-1_F@I_8",
"LB-NA12001:Solexa-001", "LB-NA12002:Solexa-002",
"LB-NA12003:Solexa-003", "LB-NA12004:Solexa-004",
"LB-NA12005:Solexa-005", "LB-NA12006:Solexa-006",
Expand Down Expand Up @@ -427,12 +427,12 @@ BOOST_FIXTURE_TEST_CASE(test_add_lib_from_rgs, FixturePedigreeMid) {

std::vector<std::string> expected_vertex{
"GL-unknown",
"GL-1", "GL-2",
"GL-4", "GL-5",
"GL-9", "GL-10",
"GL-3", "GL-6", "GL-11",
"GL-7", "GL-8",
"GL-12",
"GL-1_F@I_1", "GL-1_F@I_2",
"GL-1_F@I_4", "GL-1_F@I_5",
"GL-1_F@I_9", "GL-1_F@I_10",
"GL-1_F@I_3", "GL-1_F@I_6", "GL-1_F@I_11",
"GL-1_F@I_7", "GL-1_F@I_8",
"GL-1_F@I_12",

"SM-NA12001", "SM-NA12002",
"SM-NA12004", "SM-NA12005",
Expand Down Expand Up @@ -654,11 +654,11 @@ BOOST_FIXTURE_TEST_CASE(test_update_labels_node_ids, FixturePedigreeMid) {
};

std::vector<std::string> expected_labels {
"GL-1", "GL-2",
"GL-4", "GL-5",
"GL-9", "GL-10",
"GL-3", "GL-6", "GL-11",
"GL-8",
"GL-1_F@I_1", "GL-1_F@I_2",
"GL-1_F@I_4", "GL-1_F@I_5",
"GL-1_F@I_9", "GL-1_F@I_10",
"GL-1_F@I_3", "GL-1_F@I_6", "GL-1_F@I_11",
"GL-1_F@I_8",

"LB-NA12001:Solexa-001", "LB-NA12002:Solexa-002",
"LB-NA12003:Solexa-003", "LB-NA12004:Solexa-004",
Expand Down
14 changes: 7 additions & 7 deletions Tests/Unit/dng/relationship_graph_trio.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ BOOST_FIXTURE_TEST_CASE(test_constructor, FixturePedigree ) {
auto labels = relationship_graph.labels();

const std::vector<std::string> expected_labels = {
"GL-1", // founder 1
"GL-2", // founder 2
"GL-1_F@I_1", // founder 1
"GL-1_F@I_2", // founder 2
"LB-NA12878:Solexa-135852", // lib 1
"LB-NA12891:Solexa-135851", // lib 2
"LB-NA12892:Solexa-135853" // lib 3
Expand Down Expand Up @@ -239,9 +239,9 @@ BOOST_FIXTURE_TEST_CASE(test_add_lib_from_rgs, ReadTrioFromFile) {

std::vector<std::string> expected_vertex{
"GL-unknown",
"GL-1",
"GL-2",
"GL-3",
"GL-1_F@I_1",
"GL-1_F@I_2",
"GL-1_F@I_3",
"SM-NA12891",
"SM-NA12892",
"SM-NA12878",
Expand Down Expand Up @@ -376,8 +376,8 @@ BOOST_FIXTURE_TEST_CASE(test_update_labels_node_ids, ReadTrioFromFile) {
S_MAX,S_MAX,S_MAX,S_MAX,
2, 3, 4};
std::vector<std::string> expected_labels {
"GL-1",
"GL-2",
"GL-1_F@I_1",
"GL-1_F@I_2",
"LB-NA12878:Solexa-135852",
"LB-NA12891:Solexa-135851",
"LB-NA12892:Solexa-135853"
Expand Down
7 changes: 7 additions & 0 deletions src/include/dng/io/ped.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
/*
* Copyright (c) 2014-2015 Reed A. Cartwright
* Copyright (c) 2016 Steven H. Wu
* Authors: Reed A. Cartwright <[email protected]>
* Steven H. Wu <[email protected]>
*
* This file is part of DeNovoGear.
*
Expand Down Expand Up @@ -40,6 +42,8 @@ namespace dng {
namespace io {

class Pedigree {
const std::string FAM_IND_DELIM = "_F@I_";

public:
typedef boost::multi_index_container<std::string,
boost::multi_index::indexed_by<
Expand Down Expand Up @@ -127,6 +131,9 @@ class Pedigree {
map<string, size_t> child_names;
child_names.emplace("", 0);
for(k = 1; k < string_table.size(); ++k) {
string_table[k][1] = string_table[k][0] + FAM_IND_DELIM + string_table[k][1];
string_table[k][2] = string_table[k][0] + FAM_IND_DELIM + string_table[k][2];
string_table[k][3] = string_table[k][0] + FAM_IND_DELIM + string_table[k][3];
bool success = child_names.emplace(string_table[k][1], k).second;
// If child name is duplicate, erase it
if(!success) {
Expand Down
2 changes: 1 addition & 1 deletion src/include/dng/relationship_graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
#include <dng/peeling.h>
#include <dng/detail/unit_test.h>

#define DEBUG_RGRAPH 1
//#define DEBUG_RGRAPH 1

namespace dng {

Expand Down

0 comments on commit 1caba64

Please sign in to comment.