-
Notifications
You must be signed in to change notification settings - Fork 1
Usage as Python library
MrTomRod edited this page Aug 29, 2022
·
2 revisions
Python bindings to the pairwise comparisons algorithm, as described in Read, 1995, Maddison, 2000 and Brynildsrud, 2016.
from pprint import pprint
from scoary import ScoaryTree, pick_single, print_tree
tree = [['isolate1', 'isolate2'], [['isolate3', 'isolate4'], ['isolate5', 'isolate6']]]
label_to_trait_a = {
'isolate1': True,
'isolate2': False,
'isolate3': True,
'isolate4': False,
'isolate5': True,
'isolate6': False,
}
label_to_trait_b = {
'isolate1': True,
'isolate2': False,
'isolate3': True,
'isolate4': False,
'isolate5': True,
'isolate6': False,
}
print_tree(
ScoaryTree.from_list(tree),
label_to_trait_a, label_to_trait_b
)
# /-11_isolate1
# /-|
# | \-00_isolate2
# |
# --| /-11_isolate3
# | /-|
# | | \-00_isolate4
# \-|
# | /-11_isolate5
# \-|
# \-00_isolate6
result = pick_single(tree, label_to_trait_a, label_to_trait_b, calc_pvals=True)
pprint(result)
# {'best_fisher_p': 0.25,
# 'max_contrasting_pairs': 3,
# 'max_opposing_pairs': 0,
# 'max_supporting_pairs': 3,
# 'worst_pval': 0.25}
This takes advantage of Numba optimizations.
import pandas as pd
from scoary import pick
tree = [['isolate1', 'isolate2'], ['isolate3', 'isolate4']]
# e.g. phenotype
label_to_trait_a = {
'isolate1': True,
'isolate2': False,
'isolate3': False,
'isolate4': True,
}
# e.g. presence/absence of genes
trait_b_df = pd.DataFrame(
columns=['isolate1', 'isolate2', 'isolate3', 'isolate4'],
data=[
[True, True, False, False], # gene 1
[True, False, True, False], # gene 2
[True, False, False, True], # ...
[False, True, True, False],
[False, True, False, True],
[False, True, False, True],
[False, True, False, True],
[False, True, False, True],
]
)
max_contr, max_suppo, max_oppos, best, worst = pick(
tree=tree,
label_to_trait_a=label_to_trait_a,
trait_b_df=trait_b_df,
calc_pvals=True
)
print(f'{max_contr=}\n{max_suppo=}\n{max_oppos=}\n{best=}\n{worst=}')
# max_contr=array([1, 2, 2, 2, 2, 2, 2, 2])
# max_suppo=array([1, 1, 2, 0, 1, 1, 1, 1])
# max_oppos=array([1, 1, 0, 2, 1, 1, 1, 1])
# best=array([1. , 1. , 0.5, 0.5, 1. , 1. , 1. , 1. ])
# worst=array([1. , 1. , 0.5, 0.5, 1. , 1. , 1. , 1. ])