-
Notifications
You must be signed in to change notification settings - Fork 0
/
GOEnrichment.py
executable file
·57 lines (45 loc) · 1.53 KB
/
GOEnrichment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# /usr/bin/env python
# -*- coding: utf-8 -*-
"""
Author: Raymond Lee
Date: January 29, 2016
A script to implement a hypergeometric test for WBbt anatomy enrichment
based on gene expression pattern annotations.
Needs:
A gene/tissue dictionary
A list of gene names to query
"""
import os
import sys, getopt
os.environ['MPLCONFIGDIR']='/tmp'
sys.path.append('../tissue_enrichment_tool_hypergeometric_test') \
# location of the test library
import matplotlib
matplotlib.use('Agg')
import pandas as pd
from scipy import stats
from hypergeometricTests \
import implement_hypergmt_enrichment_tool as hgt
# for arg in sys.argv:
# print arg
q_threshold=0.1
# q_threshold= 0.018
# in the case of anatomical expression patterns, \
# there are ~5000 genes total in the dictionary whereas a tissue has \
# minimally 100 genes. Thus, if the input contains a singleton gene, \
# the p can be as low as 0.02 for tissues that does not express that \
# gene at all. It makes no sense to say that the tissue is enriched.
path= "./"
os.chdir(path)
if len(sys.argv) == 2:
gene_file = sys.argv[1]
# q_threashold = sys.argv[2]
else:
exit("Must specify a gene list. Bye")
genes1= pd.read_csv(gene_file) #this file should be only wormbase ID's!
print "using file ", gene_file
#extract the first column of the file as a list of strings for analysis
gene_list1= genes1[genes1.columns[0]].values
#read in the dictionary
tissue_df= pd.read_csv("/home/raymond/local/src/git/EnrichmentTools/input/dictionary_go.csv")
hgt(gene_list=gene_list1, tissue_df=tissue_df)