Skip to content

Commit

Permalink
Merge pull request #7 from apcamargo/individualized-tsi
Browse files Browse the repository at this point in the history
Version 0.5.0
  • Loading branch information
apcamargo authored Jun 2, 2019
2 parents 507c27c + 5ff645d commit 836c677
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 95 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ positional arguments:
input_file Expression matrix file in the TSV or CSV formats.
output_file Output TSV file containing tissue-specificity values.
method Tissue-specificity metric. Allowed values are:
"counts", "tsi", "tau", "gini", "simpson",
"shannon_specificity", "roku_specificity", "zscore",
"spm", "spm_dpm", "js_specificity",
"counts", "tau", "gini", "simpson",
"shannon_specificity", "roku_specificity", "tsi",
"zscore", "spm", "spm_dpm", "js_specificity",
"js_specificity_dpm".
optional arguments:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

setup(
name='tspex',
version='0.4.0',
version='0.5.0',
packages=find_packages(),
license='GNU General Public License v3.0',
description='A Python package for calculating tissue-specificity metrics for gene expression.',
Expand Down
5 changes: 2 additions & 3 deletions tspex/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,8 @@ def tspex_cli(input_file, output_file, method, log, disable_transformation, thre


def main():
method_choices = ['counts', 'tsi', 'tau', 'gini', 'simpson', 'shannon_specificity',
'roku_specificity', 'zscore', 'spm', 'spm_dpm', 'js_specificity',
'js_specificity_dpm']
method_choices = ['counts', 'tau', 'gini', 'simpson', 'shannon_specificity', 'roku_specificity',
'tsi', 'zscore', 'spm', 'spm_dpm', 'js_specificity', 'js_specificity_dpm']
parser = argparse.ArgumentParser(
description='Compute gene tissue-specificity from an expression matrix and save the output.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
Expand Down
23 changes: 12 additions & 11 deletions tspex/core/specificity_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ class TissueSpecificity:
corresponding to genes and columns to tissues/conditions.
method : str
A string representing which tissue-expression metric should be
calculated. One of: 'counts', 'tsi', 'tau', 'gini', 'simpson',
'shannon_specificity', 'roku_specificity', 'zscore', 'spm', 'spm_dpm',
'js_specificity', 'js_specificity_dpm'.
calculated. One of: 'counts', 'tau', 'gini', 'simpson',
'shannon_specificity', 'roku_specificity', 'tsi', 'zscore', 'spm',
'spm_dpm', 'js_specificity', 'js_specificity_dpm'.
log : bool, default False
Log-transform the expression matrix before computing tissue-specificity
by taking the base-2 logarithm of one plus the expression values. By
Expand Down Expand Up @@ -76,12 +76,12 @@ class TissueSpecificity:
def __init__(self, expression_data, method, log=False, **kwargs):
self._function_dictionary = {
'counts': counts,
'tsi': tsi,
'tau': tau,
'gini': gini,
'simpson': simpson,
'shannon_specificity': shannon_specificity,
'roku_specificity': roku_specificity,
'tsi': tsi,
'zscore': zscore,
'spm': spm,
'spm_dpm': spm_dpm,
Expand All @@ -105,7 +105,7 @@ def __init__(self, expression_data, method, log=False, **kwargs):

def _compute_tissue_specificity(self):
func = self._function_dictionary[self._method]
if self._method in ['zscore', 'spm', 'js_specificity']:
if self._method in ['tsi', 'zscore', 'spm', 'js_specificity']:
tissue_specificity = self.expression_data.apply(func, axis=1, result_type='broadcast',
transform=self._transform)
else:
Expand All @@ -118,8 +118,8 @@ def _compute_tissue_specificity(self):
def plot_histogram(self, bins=30, size=(6, 4), dpi=75):
"""
Plot a histogram of the tissue-specificity values. If the chosen metric
is one of 'zscore', 'spm' or 'js_specificity', the maximum row value is used
as a representative of the gene tissue-specificity.
is one of 'tsi', 'zscore', 'spm' or 'js_specificity', the maximum row
value is used as a representative of the gene tissue-specificity.
Parameters
----------
Expand All @@ -132,7 +132,7 @@ def plot_histogram(self, bins=30, size=(6, 4), dpi=75):
"""

with plt.style.context('seaborn-whitegrid'):
if self._method in ['zscore', 'spm', 'js_specificity']:
if self._method in ['tsi', 'zscore', 'spm', 'js_specificity']:
data = self.tissue_specificity.max(axis=1).values
else:
data = self.tissue_specificity.values
Expand All @@ -147,8 +147,9 @@ def plot_heatmap(self, threshold, sort_genes=False, use_zscore=False, gene_names
"""
Plot a heatmap of the expression of genes with tissue-specificity over a
given a threshold. The threshold should be in the [0,1] range. If the
chosen metric is one of 'zscore', 'spm' or 'js_specificity', the maximum
row value is used as a representative of the gene tissue-specificity.
chosen metric is one of 'tsi', 'zscore', 'spm' or 'js_specificity', the
maximum row value is used as a representative of the gene
tissue-specificity.
Parameters
----------
Expand All @@ -170,7 +171,7 @@ def plot_heatmap(self, threshold, sort_genes=False, use_zscore=False, gene_names
The resolution in dots per inch.
"""

if self._method in ['zscore', 'spm', 'js_specificity']:
if self._method in ['tsi', 'zscore', 'spm', 'js_specificity']:
ts_data = self.tissue_specificity.max(axis=1)
else:
ts_data = self.tissue_specificity
Expand Down
60 changes: 30 additions & 30 deletions tspex/core/specificity_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,36 +82,6 @@ def counts(vector, **kwargs):
return cts_transformed


def tsi(vector, **kwargs):
"""
Quantify tissue-specificity as the ratio between the vector's maximum
expression value and the sum of the expression values in all tissues.
Parameters
----------
vector : numpy.array
Gene expression vector. Each value corresponds to the gene expression
in a given tissue.
Returns
-------
float
Single summary of the tissue-specificity. Ranges from 0 (ubiquitous
expression) to 1 (specific expression).
References
----------
.. [1] Julien, Philippe, et al. "Mechanisms and evolutionary patterns of
mammalian and avian dosage compensation." PLoS biology 10.5 (2012)
"""

if not np.any(vector):
return 0.0
else:
tissue_specificity_index = max(vector) / np.sum(vector)
return tissue_specificity_index


def tau(vector, **kwargs):
"""
Quantify tissue-specificity as the Tau index [1].
Expand Down Expand Up @@ -307,6 +277,36 @@ def roku_specificity(vector, **kwargs):
return rs


def tsi(vector, **kwargs):
"""
Quantify tissue-specificity as the ratio between the expression vector
and the sum of the expression values in all tissues.
Parameters
----------
vector : numpy.array
Gene expression vector. Each value corresponds to the gene expression
in a given tissue.
Returns
-------
numpy.array
Summary of the tissue-specificity level in each tissue. It ranges from
0 (ubiquitous expression) to 1 (specific expression).
References
----------
.. [1] Julien, Philippe, et al. "Mechanisms and evolutionary patterns of
mammalian and avian dosage compensation." PLoS biology 10.5 (2012)
"""

if not np.any(vector):
return 0.0
else:
tissue_specificity_index = vector / np.sum(vector)
return tissue_specificity_index


def zscore(vector, **kwargs):
"""
Quantify tissue-specificity as z-scores.
Expand Down
Loading

0 comments on commit 836c677

Please sign in to comment.