-
Notifications
You must be signed in to change notification settings - Fork 1
/
qc5_mapped_hist_rsem.py
59 lines (49 loc) · 1.22 KB
/
qc5_mapped_hist_rsem.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
"""
Analysis!
Compute histograms/make scatter plots of the number of mapped and unmapped reads for each cell
"""
"""
Import python packages
"""
import HTSeq
import collections
import itertools
import os
import subprocess
import collections
import datetime
import yaml
import fnmatch
import shlex
import numpy
import scipy
import scipy.io as sio
import pyensembl
import h5py
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use("Agg")
import cPickle as pickle
import matplotlib.pyplot as plt
"""
Load all the cells
"""
matplotlib.style.use('ggplot')
direc = '/scratch/PI/mcovert/dvanva/sequencing/'
all_cell_file = 'all_cells_rsem.pkl'
all_cells = pickle.load(open(os.path.join(direc,all_cell_file)))
unmapped_list = []
mapped_list = []
for cell in all_cells:
mapped_list += [cell.num_mapped_rsem]
unmapped_list += [cell.num_unmapped_rsem]
mapped = np.array(mapped_list)/2
unmapped = np.array(unmapped_list)/2
mp = plt.hist(mapped, bins = 40, label = 'Mapped reads')
unmp = plt.hist(unmapped, bins = 40, label = 'Unmapped reads')
plt.xlabel('Number of reads')
plt.ylabel('Number of cells')
plt.title('Number of reads mapping to the transcriptome')
plt.legend()
plt.savefig("plots/qc5_num_mapped_histogram_rsem.pdf")