Skip to content

Commit

Permalink
Merge pull request #5 from apcamargo/plotting
Browse files Browse the repository at this point in the history
Version 0.2.0
  • Loading branch information
apcamargo authored Mar 21, 2019
2 parents c666571 + db81180 commit 3761382
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 17 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ tspex is a Python package for calculating tissue-specificity metrics from gene e
tspex features include:
- An easy-to-use object-oriented interface.
- Twelve different tissue-specificity metrics.
- A plotting function.
- Integration with pandas.
- Graphing functions.
- Support for Jupyter notebooks.

## Installation
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@

setup(
name='tspex',
version='0.1.1',
version='0.2.0',
packages=find_packages(),
license='GNU General Public License v3.0',
description='A Python package for calculating tissue-specificity metrics for gene expression.',
long_description=open('README.md').read(),
long_description_content_type='text/markdown',
install_requires=['matplotlib', 'numpy', 'pandas >= 0.23'],
install_requires=['matplotlib >= 2.2', 'numpy', 'pandas >= 0.23'],
python_requires= '>=3',
url='https://github.com/apcamargo/tspex',
keywords=['bioinformatics', 'gene expression', 'tissue-specificity', 'transcriptomics'],
Expand Down
72 changes: 58 additions & 14 deletions tspex/core/specificity_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
TissueSpecificity class of the tspex library.
"""

import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -85,11 +87,11 @@ def __init__(self, expression_data, method, log=False, **kwargs):
'js_specificity': js_specificity,
'js_specificity_dpm': js_specificity_dpm
}
self.expression_data = expression_data.astype('float')
if np.any(self.expression_data < 0):
raise ValueError('Negative expression values are not allowed.')
if log:
self.expression_data = expression_data.astype('float')
self.expression_data = self.expression_data.apply(lambda x: np.log(x+1))
else:
self.expression_data = expression_data.astype('float')
self._method = str(method)
self._transform = kwargs.pop('transform', True)
self._threshold = kwargs.pop('threshold', 0)
Expand All @@ -107,16 +109,15 @@ def _compute_tissue_specificity(self):
tissue_specificity = tissue_specificity.round(4)
return tissue_specificity


def tspex_plot(self, bins=30, size=(7,4), dpi=100):
def plot_histogram(self, bins=50, size=(7, 4), dpi=100):
"""
Plot a histogram of the tissue-specificity values. If the chosen metric
is one of 'zscore', 'spm' or 'js_specificity', the maximum row value is used
as a representative of the gene tissue-specificity.
Parameters
----------
bins : int, default 30
bins : int, default 50
Number of bins in the histogram.
size : tuple, default (7,4)
Size of the figure.
Expand All @@ -129,25 +130,68 @@ def tspex_plot(self, bins=30, size=(7,4), dpi=100):
data = self.tissue_specificity.max(axis=1).values
else:
data = self.tissue_specificity.values
fig, ax = plt.subplots(figsize=size, dpi=dpi)
fig, ax = plt.subplots(figsize=size, dpi=dpi, constrained_layout=True)
ax.hist(data, bins=bins, alpha=0.85, color='#262626')
ax.set_ylabel('Number of genes')
ax.set_xlabel(self._method)
ax.set_title('Histogram of {} values'.format(self._method), loc='left')


def to_file(self, filename):
def plot_heatmap(self, threshold, use_zscore=False, gene_names=True,
tissue_names=True, cmap='viridis', size=(7, 4), dpi=100):
"""
Write the tissue-specificity values into a tab-separated values (tsv)
file.
Plot a heatmap of the expression of genes with tissue-specificity over a
given a threshold. The threshold should be in the [0,1] range. If the
chosen metric is one of 'zscore', 'spm' or 'js_specificity', the maximum
row value is used as a representative of the gene tissue-specificity.
Parameters
----------
filename : str
A string containing a path to a filename.
threshold : float, default None
Tissue-specificity threshold.
use_zscore : bool, default False
Use expression z-score instead of the raw values.
gene_names : bool, default True
Show gene names in the y-axis.
tissue_names : bool, default True
Show tissue names in the x-axis.
cmap : str or matplotlib.colors.Colormap, default 'viridis'
Colormap to use in the heatmap.
size : tuple, default (7,4)
Size of the figure.
dpi : int, default 100
The resolution in dots per inch.
"""

self.tissue_specificity.to_csv(filename, sep='\t')
if self._method in ['zscore', 'spm', 'js_specificity']:
ts_data = self.tissue_specificity.max(axis=1)
else:
ts_data = self.tissue_specificity
expr_data = self.expression_data.loc[ts_data >= threshold]
if not len(expr_data):
warnings.warn('There is no gene with tissue-specificity value above the threshold.')
return None
if use_zscore:
expr_data = expr_data.apply(zscore, axis=1, result_type='broadcast', transform=False)
fig, ax = plt.subplots(figsize=size, dpi=dpi, constrained_layout=True)
im = ax.imshow(expr_data, cmap=cmap, aspect='auto')
ax.set_ylabel('Genes')
ax.set_xlabel('Tissues')
ax.set_yticks(np.arange(0, len(expr_data.index), 1))
ax.set_yticklabels(expr_data.index)
ax.set_xticks(np.arange(0, len(expr_data.columns), 1))
ax.set_xticklabels(expr_data.columns)
ax.tick_params(length=0)
ax.tick_params(axis='x', rotation=45)
if not gene_names:
ax.tick_params(labelleft=False)
if not tissue_names:
ax.tick_params(labelbottom=False)
cbar = fig.colorbar(im, ax=ax, pad=0.005, aspect=30)
if use_zscore:
cbar.ax.set_ylabel(ylabel='Expression (z-score)', rotation=-90, va='bottom')
else:
cbar.ax.set_ylabel(ylabel='Expression', rotation=-90, va='bottom')
cbar.ax.tick_params(length=0)

def _repr_html_(self):
if isinstance(self.tissue_specificity, pd.core.frame.DataFrame):
Expand Down

0 comments on commit 3761382

Please sign in to comment.