-
Notifications
You must be signed in to change notification settings - Fork 1
/
qc8_kal_vs_rsem_transcriptome.py
92 lines (72 loc) · 2.06 KB
/
qc8_kal_vs_rsem_transcriptome.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
"""
Analysis!
Compute histograms/make scatter plots of the number of reads for each spike in for each cell
"""
"""
Import python packages
"""
import HTSeq
import collections
import itertools
import os
import subprocess
import collections
import datetime
import yaml
import fnmatch
import shlex
import numpy
import scipy
import scipy.io as sio
import pyensembl
import h5py
import pandas as pd
import numpy as np
import matplotlib
import cPickle as pickle
from scipy import stats
matplotlib.use("Agg")
import matplotlib.pyplot as plt
"""
Load all the cells
"""
matplotlib.style.use('ggplot')
direc = '/scratch/PI/mcovert/dvanva/sequencing/'
all_cell_file = 'all_cells_rsem.pkl'
all_cells = pickle.load(open(os.path.join(direc,all_cell_file)))
kal_list = []
rsem_list = []
index_rsem = all_cells[0].fpkm_rsem.index
index_kal = all_cells[0].fpkm.index
joint_index = list(set(index_rsem).intersection(set(index_kal)))
joint_index = joint_index[0:500]
print joint_index
cell = all_cells[0]
print list(cell.fpkm_rsem.loc[joint_index] +1)
counter = 0
for cell in all_cells:
print counter
counter += 1
rsem_list += list(cell.fpkm_rsem.loc[joint_index] +1)
kal_list += list(cell.fpkm.loc[joint_index] + 1)
rsem_list = np.array(rsem_list)
kal_list = np.array(kal_list)
slope, intercept, r_value, p_value, std_err = stats.linregress(kal_list, rsem_list)
x = np.linspace(0,1e5,1000)
y = slope *x + intercept
fig = plt.figure(figsize = (6,6))
ax = fig.add_subplot(111)
# ax.scatter(rsem_list, spike1_list_rsem, color = 'b', s = .5, alpha = 1, label = 'Spike 1')
ax.set_xlim([.8,1e5])
ax.set_ylim([.8,1e5])
ax.scatter(kal_list, rsem_list, color = 'b', s = .5, alpha = 1)
ax.plot(x,y,'r')
ax.text(.2,.9, 'r = ' + str(r_value), ha='center', va='center', transform=ax.transAxes)
ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel('Kallisto fpkm + 1', fontsize = 12)
ax.set_ylabel('STAR/RSEM fpkm + 1', fontsize = 12)
ax.set_title('Kallisto vs STAR/RSEM transcriptome comparison', y= 1.05, fontsize = 14)
# plt.legend(loc = 4)
fig.tight_layout()
plt.savefig("plots/qc8_kalvsstar_transcriptome.png")