Usage as Python library

Python bindings to the pairwise comparisons algorithm, as described in Read, 1995, Maddison, 2000 and Brynildsrud, 2016.

Simple pair picking

from pprint import pprint
from scoary import ScoaryTree, pick_single, print_tree

tree = [['isolate1', 'isolate2'], [['isolate3', 'isolate4'], ['isolate5', 'isolate6']]]

label_to_trait_a = {
    'isolate1': True,
    'isolate2': False,
    'isolate3': True,
    'isolate4': False,
    'isolate5': True,
    'isolate6': False,
}

label_to_trait_b = {
    'isolate1': True,
    'isolate2': False,
    'isolate3': True,
    'isolate4': False,
    'isolate5': True,
    'isolate6': False,
}

print_tree(
    ScoaryTree.from_list(tree),
    label_to_trait_a, label_to_trait_b
)
#       /-11_isolate1
#    /-|
#   |   \-00_isolate2
#   |
# --|      /-11_isolate3
#   |   /-|
#   |  |   \-00_isolate4
#    \-|
#      |   /-11_isolate5
#       \-|
#          \-00_isolate6

result = pick_single(tree, label_to_trait_a, label_to_trait_b, calc_pvals=True)
pprint(result)
# {'best_fisher_p': 0.25,
#  'max_contrasting_pairs': 3,
#  'max_opposing_pairs': 0,
#  'max_supporting_pairs': 3,
#  'worst_pval': 0.25}

Parallel pair picking

This takes advantage of Numba optimizations.

import pandas as pd
from scoary import pick

tree = [['isolate1', 'isolate2'], ['isolate3', 'isolate4']]

# e.g. phenotype
label_to_trait_a = {
    'isolate1': True,
    'isolate2': False,
    'isolate3': False,
    'isolate4': True,
}

# e.g. presence/absence of genes
trait_b_df = pd.DataFrame(
    columns=['isolate1', 'isolate2', 'isolate3', 'isolate4'],
    data=[
        [True, True, False, False],  # gene 1
        [True, False, True, False],  # gene 2
        [True, False, False, True],  # ...
        [False, True, True, False],
        [False, True, False, True],
        [False, True, False, True],
        [False, True, False, True],
        [False, True, False, True],
    ]
)

max_contr, max_suppo, max_oppos, best, worst = pick(
    tree=tree,
    label_to_trait_a=label_to_trait_a,
    trait_b_df=trait_b_df,
    calc_pvals=True
)

print(f'{max_contr=}\n{max_suppo=}\n{max_oppos=}\n{best=}\n{worst=}')
# max_contr=array([1, 2, 2, 2, 2, 2, 2, 2])
# max_suppo=array([1, 1, 2, 0, 1, 1, 1, 1])
# max_oppos=array([1, 1, 0, 2, 1, 1, 1, 1])
# best=array([1. , 1. , 0.5, 0.5, 1. , 1. , 1. , 1. ])
# worst=array([1. , 1. , 0.5, 0.5, 1. , 1. , 1. , 1. ])

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Usage as Python library

Simple pair picking

Parallel pair picking

Table of Contents

Clone this wiki locally