-
Notifications
You must be signed in to change notification settings - Fork 0
/
GseaPy_analysis.py
99 lines (70 loc) · 3.29 KB
/
GseaPy_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
## Minimal test of GSEAPY
## Dev version of GSEAPY, run in Python3
## $ conda install -c bioconda gseapy
## conda env "gseapy_test --> /home/sergio/miniconda3/envs/gseapy_test"
import gseapy as gp
import numpy as np
from os.path import join
import pandas as pd
import matplotlib.pyplot as plt
tbl = pd.read_table(join('tracks', 'TLX3vsRAG-results_genes.txt'), index_col=0)
tbl = tbl[(tbl.padj < 0.05)].dropna()
names = pd.read_table("tracks/annot_tracks/references/mm9/mm9_EnsemblTransc_GeneNames.txt",
index_col=0,
header=0,
names=['GeneID', 'TransID', 'Gene_name'])
names = names[~names.index.duplicated(keep='first')]
names = names.drop('TransID', axis=1)
#names = names.reindex(tbl.index)
names = names.loc[tbl.index]
cols = ['TLX3.1_1', 'TLX3.1_5', 'TLX3.1_P', 'R2.RAG1W.RAG1', 'RAGS.RAGZ', 'RAGZ']
classes = ['TLX3','TLX3','TLX3', 'RAG','RAG','RAG']
assert names.index.all() == tbl.index.all()
tbn = pd.concat([names,tbl[cols]], axis=1)
#tbn = tbn.drop(['baseMean', 'log2FoldChange', 'lfcSE', 'stat', 'pvalue', 'padj'], axis=1)
tbn['Gene_name'] = tbn['Gene_name'].str.upper()
tbn = tbn.reset_index(drop=True)
## ==== GSEAPY
# --- get gene lists
#~ list_gs = gp.get_library_name()
gs_dic = { 'Hallmark': 'tracks/GSEA_gene_sets/h.all.v6.0.symbols.gmt',
'BioCarta_2013': 'BioCarta_2013',
'BioCarta_2015': 'BioCarta_2015',
'BioCarta_2016': 'BioCarta_2016',
'KEGG_2013': 'KEGG_2013',
'KEGG_2015': 'KEGG_2015',
'KEGG_2016': 'KEGG_2016',
'Reactome_2013': 'Reactome_2013',
'Reactome_2015': 'Reactome_2015',
'Reactome_2016': 'Reactome_2016',
'Canonical_pathways': 'tracks/GSEA_gene_sets/c2.cp.v6.0.symbols.gmt',
'Imunno':'tracks/GSEA_gene_sets/c7.all.v6.0.symbols.gmt',
'Oncogenic_signatures': 'MSigDB_Oncogenic_Signatures',
'Computational':'MSigDB_Computational',
'NCI-60_Cancer_Cell_Lines': 'NCI-60_Cancer_Cell_Lines',
'Transcription_factor_targets': 'tracks/GSEA_gene_sets/c3.tft.v6.0.symbols.gmt',
'IMMPort': 'gene_lists/IMMPort/IMMPort_test.gmt',
'Pierre_sets': 'tracks/GSEA_gene_sets/Pierre_gene_sets.gmt',
'Pierre_sets_TLX_enh_TSS': 'tracks/GSEA_gene_sets/Pierre_gene_sets_plus.gmt',
'Pierre_sets_v2': 'tracks/GSEA_gene_sets/Pierre_gene_sets_v2.gmt'}
#~ for g_set in gs_dic.keys():
for g_set in ['Pierre_sets_v2']:
out_dir = 'GSEA/TLX3vsRAG_' + g_set + '_classic_std'
gs_res = gp.gsea(data=tbn,
gene_sets = gs_dic[g_set],
weighted_score_type = 0,
#~ method = 'ratio_of_classes',
min_size = 10,
max_size = 10000,
graph_num = 150,
permutation_type = 'gene_set',
outdir=out_dir,
cls = classes)
# plotting
gsea_results= gs_res.res2d
with plt.style.context('ggplot'):
gsea_results = gsea_results.reset_index()
gsea_results.head(40).plot.barh(y='fdr',x='Term', figsize=(18, 6), fontsize=10)
plt.gca().invert_yaxis()
plt.savefig(out_dir+'/'+'TLX3vsRAG_' +g_set+'.pdf', format="pdf")
plt.show()