-
Notifications
You must be signed in to change notification settings - Fork 5
/
best.c
1788 lines (1484 loc) · 64.8 KB
/
best.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Best 2.2
*
* This file contains the functions
* for calculating the probability of
* gene trees given the species tree
* and the prior probability of the
* species tree
*
* Liang Liu
* Department of Statistics
* The Ohio State University
* Columbus, Ohio
*
*/
#include "bayes.h"
#include "best.h"
#include "command.h"
#include "mcmc.h"
#include "model.h"
#include "proposal.h"
#include "utils.h"
const char* const svnRevisionBestC = "$Rev: 1040 $"; /* Revision keyword which is expended/updated by svn on each commit/update */
/****************************** Local functions converted by Fredrik from BEST code *****************************/
int CompareDepths (const void *x, const void *y);
int CompareDoubles (const void *x, const void *y);
int CompareNodes (const void *x, const void *y);
int CompareNodesByX (const void *x, const void *y);
int GetSpeciesTreeFromMinDepths (Tree* speciesTree, double *depthMatrix);
int GetDepthMatrix(Tree * speciesTree, double *depthMatrix);
int GetMeanDist (Tree *speciesTree, double *depthMatrix, double *mean);
int GetMinDepthMatrix (Tree **geneTrees, int numGeneTrees, double *depthMatrix);
void LineagesIn (TreeNode* geneTreeNode, TreeNode* speciesTreeNode);
double LnPriorProbGeneTree (Tree *geneTree, double mu, Tree *speciesTree, double *popSizePtr);
double LnProposalProbSpeciesTree (Tree *speciesTree, double *depthMatrix, double expRate);
void MapGeneTreeToSpeciesTree (Tree *geneTree, Tree *speciesTree);
int ModifyDepthMatrix (double expRate, double *depthMatrix, RandLong *seed);
/* Global BEST variables */
BitsLong **speciesPairSets;
double *depthMatrix;
/* Allocate variables used by best code during mcmc */
void AllocateBestChainVariables (void)
{
int i, j, index, numUpperTriang, nLongsNeeded;
// Free if by mistake variables are already allocated
if (memAllocs[ALLOC_BEST] == YES)
FreeBestChainVariables ();
// Allocate space for upper triangular pair sets
numUpperTriang = (numSpecies * (numSpecies-1)) / 2;
nLongsNeeded = ((numSpecies - 1) / nBitsInALong) + 1;
speciesPairSets = (BitsLong **) SafeCalloc (numUpperTriang, sizeof(BitsLong *));
speciesPairSets[0] = (BitsLong *) SafeCalloc (numUpperTriang*nLongsNeeded, sizeof(BitsLong));
for (i=1; i<numUpperTriang; i++)
speciesPairSets[i] = speciesPairSets[0] + i*nLongsNeeded;
// Set upper triangular pair partitions once and for all
index = 0;
for (i=0; i<numSpecies; i++) {
for (j=i+1; j<numSpecies; j++) {
SetBit(i, speciesPairSets[index]);
SetBit(j, speciesPairSets[index]);
index++;
}
}
/* allocate species for depthMatrix */
depthMatrix = SafeCalloc (numUpperTriang, sizeof(double));
memAllocs[ALLOC_BEST] = YES;
}
/** Compare function (Depth struct) for qsort */
int CompareDepths (const void *x, const void *y) {
if ((*((Depth *)(x))).depth < (*((Depth *)(y))).depth)
return -1;
else if ((*((Depth *)(x))).depth > (*((Depth *)(y))).depth)
return 1;
else
return 0;
}
/** Compare function (doubles) for qsort */
int CompareDoubles (const void *x, const void *y) {
if (*((double *)(x)) < *((double *)(y)))
return -1;
else if (*((double *)(x)) > *((double *)(y)))
return 1;
else
return 0;
}
/** Compare function (TreeNode struct) for qsort */
int CompareNodes (const void *x, const void *y) {
if ((*((TreeNode **)(x)))->nodeDepth < (*((TreeNode**)(y)))->nodeDepth)
return -1;
else if ((*((TreeNode **)(x)))->nodeDepth > (*((TreeNode**)(y)))->nodeDepth)
return 1;
else
return 0;
}
/** Compare function (TreeNode struct; sort by x, then by nodeDepth) for qsort */
int CompareNodesByX (const void *x, const void *y) {
if ((*((TreeNode **)(x)))->x < (*((TreeNode**)(y)))->x)
return -1;
else if ((*((TreeNode **)(x)))->x > (*((TreeNode**)(y)))->x)
return 1;
else {
if ((*((TreeNode **)(x)))->nodeDepth < (*((TreeNode**)(y)))->nodeDepth)
return -1;
else if ((*((TreeNode **)(x)))->nodeDepth > (*((TreeNode**)(y)))->nodeDepth)
return 1;
else
return 0;
}
}
/**-----------------------------------------------------------------
|
| FillSpeciesTreeParams: Fill in species trees (start value)
|
------------------------------------------------------------------*/
int FillSpeciesTreeParams (RandLong *seed, int fromChain, int toChain)
{
int i, k, chn, numGeneTrees, freeBestChainVars;
Param *p;
Tree *speciesTree, **geneTrees;
// Allocate space for global best model variables used in this function, in case they are not allocated
if (memAllocs[ALLOC_BEST] == NO)
{
freeBestChainVars = YES;
AllocateBestChainVariables();
}
else
freeBestChainVars = NO;
// Use global variable numTopologies to calculate number of gene trees
// There is one topology for the species tree, the other ones are gene trees
// The number of current divisions is not safe because one gene tree can have
// several partitions, for instance if we assign the different genes on the
// mitochondrion different substitution models, or if we assign different rates
// to the codon site positions in a sequence
numGeneTrees = numTopologies - 1;
geneTrees = (Tree **) SafeCalloc (numGeneTrees, sizeof(Tree*));
// Build species trees for state 0
for (chn=fromChain; chn<toChain; chn++)
{
for (k=0; k<numParams; k++)
{
p = ¶ms[k];
if (p->paramType == P_SPECIESTREE)
{
// Find species tree and gene trees
speciesTree = GetTree(p, chn, 0);
for (i=0; i<p->nSubParams; i++)
geneTrees[i] = GetTree(p->subParams[i], chn, 0);
// Get minimum depth matrix for species tree
GetMinDepthMatrix (geneTrees, numGeneTrees, depthMatrix);
// Get a species tree from min depth matrix
GetSpeciesTreeFromMinDepths(speciesTree, depthMatrix);
assert (IsSpeciesTreeConsistent(speciesTree, chn) == YES);
// Label the tips
if (LabelTree (speciesTree, speciesNameSets[speciespartitionNum].names) == ERROR)
{
FreeBestChainVariables();
return (ERROR);
}
}
}
}
// Free gene trees
free (geneTrees);
// Free best model variables if appropriate
if (freeBestChainVars == YES)
FreeBestChainVariables();
return (NO_ERROR);
MrBayesPrint ("%lf", *seed); /* just because I am tired of seeing the unused parameter error msg */
}
/**-----------------------------------------------------------------
|
| FreeBestChainVariables: Free best variables used during an mcmc
| run.
|
------------------------------------------------------------------*/
void FreeBestChainVariables(void)
{
if (memAllocs[ALLOC_BEST] == YES) {
free (speciesPairSets[0]);
free (speciesPairSets);
speciesPairSets = NULL;
}
free (depthMatrix);
depthMatrix = NULL;
memAllocs[ALLOC_BEST] = NO;
}
/**---------------------------------------------------------------------
|
| GetDepthMatrix:
|
| This algorithm calculates the upper triangular depth matrix for the
| species tree. Time complexity O(n^2).
|
| @param speciesTree The species tree (in)
| @param depthMatrix The minimum depth matrix, upper triangular array (out)
| @returns Returns ERROR or NO_ERROR
----------------------------------------------------------------------*/
int GetDepthMatrix (Tree *speciesTree, double *depthMatrix) {
int i, left, right, numUpperTriang, index, nLongsNeeded, freeBitsets;
double maxDepth;
TreeNode *p;
// Make sure we have bitfields allocated and set
freeBitsets = NO;
if (speciesTree->bitsets == NULL)
{
AllocateTreePartitions(speciesTree);
freeBitsets = YES;
}
else
{
ResetTreePartitions(speciesTree); // just in case
freeBitsets = NO;
}
// Calculate number of values in the upper triangular matrix
numUpperTriang = numSpecies * (numSpecies - 1) / 2;
// Number of longs needed in a bitfield representing a species set
nLongsNeeded = ((numSpecies -1) / nBitsInALong) + 1;
// Set all cells to max
maxDepth = speciesTree->root->left->nodeDepth; // root depth
for (i=0; i<numUpperTriang; i++)
depthMatrix[i] = maxDepth;
// Loop over interior nodes
for (i=0; i<speciesTree->nIntNodes; i++)
{
p = speciesTree->intDownPass[i];
for (left = FirstTaxonInPartition(p->left->partition, nLongsNeeded); left < numSpecies; left = NextTaxonInPartition(left, p->left->partition, nLongsNeeded))
{
for (right = FirstTaxonInPartition(p->right->partition, nLongsNeeded); right < numSpecies; right = NextTaxonInPartition(right, p->right->partition, nLongsNeeded))
{
index = UpperTriangIndex(left, right, numSpecies);
depthMatrix[index] = p->nodeDepth;
}
}
}
// Free partitions if appropriate
if (freeBitsets == YES)
FreeTreePartitions(speciesTree);
return (NO_ERROR);
}
/**---------------------------------------------------------------------
|
| GetMeanDist
|
| This algorithm calculates the mean distance between a distance matrix
| and the minimum depths that define a species tree.
|
| @param speciesTree The species tree (in)
| @param minDepthMatrix The minimum depth matrix, upper triangular array (in)
| @param mean The mean distance (out)
| @returns Returns ERROR or NO_ERROR
----------------------------------------------------------------------*/
int GetMeanDist (Tree *speciesTree, double *minDepthMatrix, double *mean) {
int i, left, right, index, nLongsNeeded, freeBitsets;
double dist, minDist=0.0, distSum;
TreeNode *p;
// Make sure we have bitfields allocated and set
freeBitsets = NO;
if (speciesTree->bitsets == NULL)
{
AllocateTreePartitions(speciesTree);
freeBitsets = YES;
}
else
{
ResetTreePartitions(speciesTree); // just in case
freeBitsets = NO;
}
// Number of longs needed in a bitfield representing a species set
nLongsNeeded = ((numSpecies -1) / nBitsInALong) + 1;
// Loop over interior nodes
distSum = 0.0;
for (i=0; i<speciesTree->nIntNodes; i++)
{
p = speciesTree->intDownPass[i];
p->x = 0;
while ((left=FirstTaxonInPartition(p->left->partition, nLongsNeeded)) < numSpecies)
{
while ((right=FirstTaxonInPartition(p->right->partition, nLongsNeeded)) < numSpecies)
{
p->x++;
index = UpperTriangIndex(left, right, numSpecies);
dist = depthMatrix[index] - p->nodeDepth;
if (p->x == 1)
minDist = dist;
else if (dist < minDist)
minDist = dist;
ClearBit(right, p->right->partition);
}
ClearBit(left, p->left->partition);
}
distSum += minDist;
}
(*mean) = distSum / speciesTree->nIntNodes;
// Reset partitions that were destroyed above or free partitions, as appropriate
if (freeBitsets == YES)
FreeTreePartitions(speciesTree);
else
ResetTreePartitions(speciesTree);
return (NO_ERROR);
MrBayesPrint ("%lf", *minDepthMatrix); /* just because I am tired of seeing the unused parameter error msg */
}
/**---------------------------------------------------------------------
|
| GetMinDepthMatrix: converted from GetMinDists.
|
| This algorithm scans the gene trees and calculates the minimum depth
| (height) separating species across gene trees. The complexity of the
| original algorithm was O(mn^3), where m is the number of gene trees and
| n is the number of taxa in each gene tree. I think this algorithm has
| complexity that is better on average, but the difference is small.
|
| I have rewritten the algorithm also to show alternative techniques that
| could be used in this and other BEST algorithms.
|
| @param geneTrees The gene trees (in)
| @param depthMatrix The minimum depth matrix, upper triangular array (out)
| @returns Returns ERROR or NO_ERROR
----------------------------------------------------------------------*/
int GetMinDepthMatrix (Tree **geneTrees, int numGeneTrees, double *depthMatrix) {
int i, j, w, nLongsNeeded, numUpperTriang, index, trace=0;
double maxDepth;
TreeNode *p;
BitsLong **speciesSets;
// Allocate space for species partitions
nLongsNeeded = ((numSpecies -1) / nBitsInALong) + 1; // number of longs needed in a bitfield representing a species set
speciesSets = (BitsLong **) SafeCalloc ((2*(size_t)numLocalTaxa-1), sizeof(BitsLong *));
speciesSets[0] = (BitsLong *) SafeCalloc ((2*(size_t)numLocalTaxa-1)*nLongsNeeded, sizeof(BitsLong));
for (i=1; i<2*numLocalTaxa-1; i++)
speciesSets[i] = speciesSets[0] + i*nLongsNeeded;
// Set tip species partitions once and for all
for (i=0; i<numLocalTaxa; i++)
SetBit(speciespartitionId[i][speciespartitionNum]-1, speciesSets[i]);
// Set initial max depth for upper triangular matrix
numUpperTriang = (numSpecies * (numSpecies - 1)) / 2;
maxDepth = geneTrees[0]->root->left->nodeDepth;
for (i=0; i<numUpperTriang; i++)
depthMatrix[i] = maxDepth;
// Now we are ready to cycle over gene trees
for (w=0; w<numGeneTrees; w++)
{
if (trace) {
printf ("\nGene %d\n",w);
ShowTree(geneTrees[w]);
}
// Set species sets for interior nodes. O(n)
for (i=0; i<geneTrees[w]->nIntNodes; i++) {
p = geneTrees[w]->intDownPass[i];
for (j=0; j<nLongsNeeded; j++)
speciesSets[p->index][j] = speciesSets[p->left->index][j] | speciesSets[p->right->index][j];
}
// Now order the interior nodes in terms of node depth. We rely on the fact that the
// ordered sequence is a valid downpass sequence. O(log n).
qsort((void *)(geneTrees[w]->intDownPass), (size_t)(geneTrees[w]->nIntNodes), sizeof(TreeNode *), CompareNodes);
// Finally find the minimum for each cell in the upper triangular matrix
// This is the time critical step with complexity O(n^3) in the simplest
// algorithm version. This algorithm should do a little better in most cases.
for (i=0; i<numUpperTriang; i++) {
// Find shallowest node that has the pair
for (j=0; j<geneTrees[w]->nIntNodes; j++) {
p = geneTrees[w]->intDownPass[j];
// Because nodes are ordered in time, if this test is true then we cannot beat the minimum
if (p->nodeDepth > depthMatrix[i])
break;
// Check whether the node is a candidate minimum for the species pair
// If the test is true, we know from the test above that p->nodeDepth is
// either a tie or the new minimum
if (IsPartNested(speciesPairSets[i], speciesSets[p->index], nLongsNeeded) == YES) {
depthMatrix[i] = p->nodeDepth;
break;
}
}
}
} // Next gene tree
if (trace)
{
index = 0;
printf ("Mindepth matrix\n");
for (i=0;i<numSpecies;i++) {
for (j=0; j<i; j++)
printf (" ");
for (j=i+1;j<numSpecies;j++) {
printf ("%.6f ",depthMatrix[index]);
index++;
}
printf ("\n");
}
printf ("\n");
}
free (speciesSets[0]);
free (speciesSets);
return (NO_ERROR);
}
/**---------------------------------------------------------------------
|
| GetSpeciesTreeFromMinDepths: converted from GetConstraints, Startsptree,
| and MaximumTree.
|
| This is a clustering algorithm based on minimum depths for species pairs.
| It reduces an n choose 2 upper triangular min depth matrix to an array
| of n-1 node depths, which fit onto a tree.
|
| @param speciesTree The species tree to be filled (out)
| @param depthMatrix The min depth matrix, upper triangular array (in)
| @returns Returns NO_ERROR if success, ERROR if negative brlens occur
----------------------------------------------------------------------*/
int GetSpeciesTreeFromMinDepths (Tree* speciesTree, double *depthMatrix) {
int i, j, numUpperTriang, nLongsNeeded, index, nextNodeIndex;
Depth *minDepth;
PolyTree *polyTree;
PolyNode *p, *q, *r, *u, *qPrev, *rPrev;
nLongsNeeded = ((numSpecies - 1) / nBitsInALong) + 1;
numUpperTriang = numSpecies*(numSpecies - 1) / 2;
minDepth = (Depth *) SafeCalloc (numUpperTriang, sizeof(Depth));
// Convert depthMatrix to an array of Depth structs
index = 0;
for (i=0; i<numSpecies; i++) {
for (j=i+1; j<numSpecies; j++) {
minDepth[index].depth = depthMatrix[index];
minDepth[index].pairSet = speciesPairSets[index];
index++;
}
}
// Sort the array of distance structs (O(log n^2))
qsort((void *)(minDepth), (size_t)(numUpperTriang), sizeof(Depth), CompareDepths);
// The algorithm below reduces the upper triangular matrix (n choose 2) to an n-1
// array in O(n^2log(n)) time. We build the tree at the same time, since we can
// find included pairs in the tree in log(n) time. We use a polytomous tree for this.
// Allocate space for polytomous tree and set up partitions
polyTree = AllocatePolyTree(numSpecies);
AllocatePolyTreePartitions(polyTree);
// Build initial tree (a bush)
polyTree->isRooted = YES;
polyTree->isClock = YES;
polyTree->root = &polyTree->nodes[2*numSpecies-2];
for (i=0; i<numSpecies; i++) {
p = &polyTree->nodes[i];
p->index = i;
p->depth = 0.0;
p->left = NULL;
if (i<numSpecies-1)
p->sib = &polyTree->nodes[i+1];
else
p->sib = NULL;
p->anc = polyTree->root;
}
p = polyTree->root;
p->index = 2*numSpecies - 2;
p->left = &polyTree->nodes[0];
p->sib = NULL;
p->anc = NULL;
p->depth = -1.0;
polyTree->nNodes = numSpecies + 1;
polyTree->nIntNodes = 1;
GetPolyDownPass(polyTree);
ResetPolyTreePartitions(polyTree); /* set bitsets (partitions) for initial tree */
// Resolve bush using sorted depth structs
nextNodeIndex = numSpecies;
for (i=0; i<numUpperTriang; i++) {
// Find tip corresponding to first taxon in pair
p = &polyTree->nodes[FirstTaxonInPartition(minDepth[i].pairSet, nLongsNeeded)];
// Descend tree until we find a node within which the pair set is nested
do {
p = p->anc;
}
while (!IsPartNested(minDepth[i].pairSet, p->partition, nLongsNeeded));
if (p->left->sib->sib != NULL) {
// This node is still a polytomy
// Find left and right descendants of new node
qPrev = NULL;
for (q=p->left; IsSectionEmpty(q->partition, minDepth[i].pairSet, nLongsNeeded); q=q->sib)
qPrev = q;
rPrev = q;
for (r=q->sib; IsSectionEmpty(r->partition, minDepth[i].pairSet, nLongsNeeded); r=r->sib)
rPrev = r;
// Introduce the new node
u = &polyTree->nodes[nextNodeIndex];
u->index = nextNodeIndex;
nextNodeIndex++;
polyTree->nIntNodes++;
polyTree->nNodes++;
u->left = q;
u->anc = p;
if (p->left == q)
p->left = u;
else
qPrev->sib = u;
// former upstream sibling to r should point to r->sib
if (rPrev == q)
u->sib = r->sib;
else
rPrev->sib = r->sib;
if (q->sib == r)
u->sib = r->sib;
else
u->sib = q->sib;
u->depth = minDepth[i].depth; // because minDepth structs are sorted, we know this is the min depth
assert (u->depth > 0.0);
// Create new taxon set with bitfield operations
for (j=0; j<nLongsNeeded; j++)
u->partition[j] = q->partition[j] | r->partition[j];
// Patch the tree together with the new node added
q->sib = r;
r->sib = NULL;
q->anc = u;
r->anc = u;
}
else if (p == polyTree->root && p->depth < 0.0) {
// This is the first time we hit the root of the tree && it is resolved
p->depth = minDepth[i].depth;
assert (p->depth > 0.0);
}
// other cases should not be added to tree
}
// Make sure we have a complete species tree
assert (polyTree->nIntNodes == numSpecies - 1);
// Set traversal sequences
GetPolyDownPass(polyTree);
// Set branch lengths from node depths (not done automatically for us)
// Make sure all branch lengths are nonnegative (we can have 0.0 brlens, they
// should not be problematic in a species tree; they occur when there are
// ties in the min depth matrix that have not been modified by the move)
for (i=0; i<polyTree->nNodes; i++) {
p = polyTree->allDownPass[i];
if (p->anc == NULL)
p->length = 0.0;
else
p->length = p->anc->depth - p->depth;
if (p->length < 0.0) {
FreePolyTree(polyTree);
free (minDepth);
return (ERROR);
}
}
// Copy to species tree from polytomous tree
CopyToSpeciesTreeFromPolyTree (speciesTree, polyTree);
// Free locally allocated variables
FreePolyTree(polyTree);
free (minDepth);
return(NO_ERROR);
}
/**---------------------------------------------------------------------------------------
|
| IsSpeciesTreeConsistent: Called when user tries to set a species tree or when
| attempting to use a species tree from a check point as starting value.
|
-----------------------------------------------------------------------------------------*/
int IsSpeciesTreeConsistent (Tree *speciesTree, int chain)
{
int i, answer, numGeneTrees, numUpperTriang, freeBestVars;
double *speciesTreeDepthMatrix;
Tree **geneTrees;
answer = NO;
freeBestVars = NO;
if (memAllocs[ALLOC_BEST] == NO)
{
AllocateBestChainVariables();
freeBestVars = YES;
}
numGeneTrees = numTrees - 1;
geneTrees = (Tree **) SafeCalloc (numGeneTrees, sizeof(Tree *));
for (i=0; i<numTrees-1; i++)
geneTrees[i] = GetTreeFromIndex(i, chain, state[chain]);
numUpperTriang = numSpecies * (numSpecies - 1) / 2;
speciesTreeDepthMatrix = (double *) SafeCalloc (numUpperTriang, sizeof(double));
GetMinDepthMatrix(geneTrees, numGeneTrees, depthMatrix);
GetDepthMatrix(speciesTree, speciesTreeDepthMatrix);
for (i=0; i<numUpperTriang; i++)
{
if (depthMatrix[i] < speciesTreeDepthMatrix[i])
break;
}
if (i == numUpperTriang)
answer = YES;
else
answer = NO;
if (answer == NO)
ShowNodes(speciesTree->root, 0, YES);
if (freeBestVars == YES)
FreeBestChainVariables();
free (speciesTreeDepthMatrix);
free (geneTrees);
return answer;
}
/**---------------------------------------------------------------------------------------
|
| LineagesIn: Recursive function to get number of gene tree lineages coming into each
| branch of the species tree (in ->x of speciestree nodes). We also assign each gene
| tree lineage to the corresponding species tree lineage (in ->x of genetree nodes).
| Finally, number of coalescent events is recorded (in ->y of speciestree nodes).
| Time complexity is O(n).
|
-----------------------------------------------------------------------------------------*/
void LineagesIn (TreeNode *geneTreeNode, TreeNode *speciesTreeNode)
{
int nLongsNeeded;
if (geneTreeNode->nodeDepth < speciesTreeNode->nodeDepth) {
// climb up species tree
if (speciesTreeNode->left == NULL) {
assert (geneTreeNode->left == NULL);
speciesTreeNode->x++;
}
else {
nLongsNeeded = (numSpecies - 1) / nBitsInALong + 1;
speciesTreeNode->x++;
if (IsPartNested(geneTreeNode->partition, speciesTreeNode->left->partition, nLongsNeeded) == YES)
LineagesIn (geneTreeNode, speciesTreeNode->left);
else if (IsPartNested(geneTreeNode->partition, speciesTreeNode->right->partition, nLongsNeeded) == YES)
LineagesIn (geneTreeNode, speciesTreeNode->right);
}
}
else {
// climb up gene tree
if (geneTreeNode->left != NULL)
LineagesIn(geneTreeNode->left, speciesTreeNode);
if (geneTreeNode->right != NULL)
LineagesIn(geneTreeNode->right, speciesTreeNode);
if (geneTreeNode->left == NULL) {
speciesTreeNode->x++;
assert (speciesTreeNode->left == NULL);
}
else {
speciesTreeNode->y++;
}
geneTreeNode->x = speciesTreeNode->index;
}
}
/**-----------------------------------------------------------------
|
| LnSpeciesTreeProb: Wrapper for LnJointGeneTreeSpeciesTreePr to
| free calling functions from retrieving gene and species trees.
|
------------------------------------------------------------------*/
double LnSpeciesTreeProb(int chain)
{
int i, numGeneTrees;
double lnProb;
Tree **geneTrees, *speciesTree;
ModelInfo *m;
m = &modelSettings[0];
speciesTree = GetTree(m->speciesTree, chain, state[chain]);
numGeneTrees = m->speciesTree->nSubParams;
geneTrees = (Tree **) SafeCalloc (numGeneTrees, sizeof(Tree *));
for (i=0; i<m->speciesTree->nSubParams; i++)
geneTrees[i] = GetTree(m->speciesTree->subParams[i], chain, state[chain]);
lnProb = LnJointGeneTreeSpeciesTreePr(geneTrees, numGeneTrees, speciesTree, chain);
free (geneTrees);
return lnProb;
}
/**-----------------------------------------------------------------
|
| LnJointGeneTreeSpeciesTreePr: Converted from LnJointGenetreePr,
| SPLogLike, SPLogPrior.
|
| In this function we calculate the entire probability of the species
| tree, including its probability given its priors, and the probability
| of the gene trees given the species tree.
|
------------------------------------------------------------------*/
double LnJointGeneTreeSpeciesTreePr(Tree **geneTrees, int numGeneTrees, Tree *speciesTree, int chain)
{
double lnPrior, lnLike, clockRate, mu, *popSizePtr, sR, eR, sF;
int i;
ModelInfo *m;
ModelParams *mp;
// Get model info for species tree
m = &modelSettings[speciesTree->relParts[0]];
// Get model params for species tree
mp = &modelParams[speciesTree->relParts[0]];
// Get popSize ptr
popSizePtr = GetParamVals(m->popSize, chain, state[chain]);
// Get clock rate
if (speciesTree->isCalibrated == YES)
clockRate = *GetParamVals(m->clockRate, chain, state[chain]);
else
clockRate = 1.0;
// Calculate probability of gene trees given species tree
// ShowNodes(speciesTree->root, 0, YES);
lnLike = 0.0;
mu = clockRate;
for (i=0; i<numGeneTrees; i++) {
lnLike += LnPriorProbGeneTree(geneTrees[i], mu, speciesTree, popSizePtr);
}
// Calculate probability of species tree given its priors
if (strcmp(mp->speciesTreeBrlensPr, "Birthdeath") == 0) {
sR = *GetParamVals(m->speciationRates, chain, state[chain]);
eR = *GetParamVals(m->extinctionRates, chain, state[chain]);
sF = mp->sampleProb;
lnPrior = 0.0;
LnBirthDeathPriorPr(speciesTree, clockRate, &lnPrior, sR, eR, mp->sampleStrat, sF);
}
else
lnPrior = 0.0;
// The population size is taken care of elsewhere
return lnLike + lnPrior;
}
/**-----------------------------------------------------------------
|
| LnPriorProbGeneTree: Calculate the prior probability of a gene
| tree.
|
------------------------------------------------------------------*/
double LnPriorProbGeneTree (Tree *geneTree, double mu, Tree *speciesTree, double *popSizePtr)
{
int i, k, index, nEvents, trace=0;
double N, lnProb, ploidyFactor, theta, timeInterval;
TreeNode *p, *q=NULL, *r;
ModelParams *mp;
// Get model params
mp = &modelParams[speciesTree->relParts[0]];
// Find ploidy setting
if (strcmp(mp->ploidy, "Diploid") == 0)
ploidyFactor = 4.0;
else if (strcmp(mp->ploidy, "Haploid") == 0)
ploidyFactor = 2.0;
else /* if (strcmp(mp->ploidy, "Zlinked") == 0) */
ploidyFactor = 3.0;
// Initialize species tree with theta in d
for (i=0; i<speciesTree->nNodes-1; i++) {
p = speciesTree->allDownPass[i];
if (strcmp(mp->popVarPr, "Equal") != 0)
N = popSizePtr[p->index];
else
N = popSizePtr[0];
p->d = ploidyFactor * N * mu;
}
// Map gene tree to species tree
MapGeneTreeToSpeciesTree(geneTree, speciesTree);
// Sort gene tree interior nodes first by speciestree branch on which they coalesce, then in time order
qsort((void *)(geneTree->intDownPass), (size_t)(geneTree->nIntNodes), sizeof(TreeNode *), CompareNodesByX);
// Debug output of qsort result
if (trace) {
printf ("index -- x -- nodeDepth for gene tree\n");
for (i=0; i<geneTree->nIntNodes; i++)
printf ("%d -- %d -- %e\n", geneTree->intDownPass[i]->index, geneTree->intDownPass[i]->x, geneTree->intDownPass[i]->nodeDepth);
}
// Now calculate probability after making sure species tree nodes appear in index order
// (the order does not have to be a correct downpass sequence)
for (i=0; i<speciesTree->memNodes; i++)
{
p = &(speciesTree->nodes[i]);
speciesTree->allDownPass[p->index] = p;
}
index = 0;
lnProb = 0.0;
for (i=0; i<speciesTree->nNodes-1; i++) {
p = speciesTree->allDownPass[i];
// Get theta
theta = p->d;
// Get number of events
nEvents = p->y;
// Calculate probability
lnProb += nEvents * log (2.0 / theta);
for (k=p->x; k > p->x - p->y; k--) {
q = geneTree->intDownPass[index];
assert (q->x == p->index);
if (k == p->x)
timeInterval = q->nodeDepth - p->nodeDepth;
else {
r = geneTree->intDownPass[index-1];
timeInterval = q->nodeDepth - r->nodeDepth;
}
lnProb -= (k * (k - 1) * timeInterval) / theta;
index++;
}
if (p->x - p->y > 1) {
if (nEvents == 0)
timeInterval = p->anc->nodeDepth - p->nodeDepth;
else
timeInterval = p->anc->nodeDepth - q->nodeDepth;
assert (p->anc->anc != NULL);
assert (timeInterval >= 0.0);
k = p->x - p->y;
lnProb -= (k * (k - 1) * timeInterval) / theta;
}
}
// Restore downpass sequences (probably not necessary for gene tree, but may be if some
// code relies on intDownPass and allDownPass to be in same order)
GetDownPass(speciesTree);
GetDownPass(geneTree);
// Free space
FreeTreePartitions(speciesTree);
FreeTreePartitions(geneTree);
return lnProb;
}
/**---------------------------------------------------------------------
|
| LnProposalProbSpeciesTree:
|
| This algorithm calculates the probability of proposing a particular
| species tree given a distance matrix modified using a scheme based on
| truncated exponential distributions with rate expRate.
|
| @param speciesTree The species tree (in)
| @param depthMatrix The minimum depth matrix, upper triangular array (in)
| @param expRate Rate of truncated exponential distribution
| @returns Returns probability of proposing the species tree
----------------------------------------------------------------------*/
double LnProposalProbSpeciesTree (Tree *speciesTree, double *depthMatrix, double expRate)
{
int i, left, right, index, nLongsNeeded, freeBitsets;
double dist, normConst=1.0, negLambdaX=0.0, eNegLambdaX, density, prob,
sumDensRatio, prodProb, lnProb;
TreeNode *p;
// Make sure we have bitfields allocated and set
freeBitsets = NO;
if (speciesTree->bitsets == NULL)
freeBitsets = YES;
else
freeBitsets = NO;
AllocateTreePartitions(speciesTree);
// Number of longs needed in a bitfield representing a species set
nLongsNeeded = ((numSpecies -1) / nBitsInALong) + 1;
// Loop over interior nodes
lnProb = 0.0;
for (i=0; i<speciesTree->nIntNodes; i++)
{
p = speciesTree->intDownPass[i];
p->x = 0;
sumDensRatio = 0.0;
prodProb = 1.0;
for (left = FirstTaxonInPartition(p->left->partition, nLongsNeeded); left < numSpecies; left = NextTaxonInPartition(left, p->left->partition, nLongsNeeded))
{
for (right = FirstTaxonInPartition(p->right->partition, nLongsNeeded); right < numSpecies; right = NextTaxonInPartition(right, p->right->partition, nLongsNeeded))
{
p->x++;
index = UpperTriangIndex(left, right, numSpecies); assert (index < numSpecies*(numSpecies - 1) / 2);
dist = depthMatrix[index] - p->nodeDepth; // distance between depth matrix entry and actual species-tree node
normConst = 1.0 - exp(-expRate * depthMatrix[index]); // normalization constant because of truncation of exp distribution
negLambdaX = - expRate * dist;
eNegLambdaX = exp(negLambdaX);
density = expRate * eNegLambdaX / normConst; // density for x == dist, f(dist)
prob = (1.0 - eNegLambdaX) / normConst; // cumulative prob for x <= dist, F(dist)
sumDensRatio += density / prob; // warning: if dist==0, prob is ZERO!
prodProb *= prob;
}
}
if (p->x == 1)
lnProb += log(expRate) + negLambdaX - log(normConst);
else