-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathqc6_spikein_count_variability_rsem.py
91 lines (69 loc) · 1.97 KB
/
qc6_spikein_count_variability_rsem.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
"""
Analysis!
Compute histograms/make scatter plots of the number of reads for each spike in for each cell
"""
"""
Import python packages
"""
import HTSeq
import collections
import itertools
import os
import subprocess
import collections
import datetime
import yaml
import fnmatch
import shlex
import numpy
import scipy
import scipy.io as sio
import pyensembl
import h5py
import pandas as pd
import numpy as np
import matplotlib
import cPickle as pickle
matplotlib.use("Agg")
import matplotlib.pyplot as plt
"""
Load all the cells
"""
matplotlib.style.use('ggplot')
direc = '/scratch/PI/mcovert/dvanva/sequencing/'
all_cell_file = 'all_cells_rsem.pkl'
all_cells = pickle.load(open(os.path.join(direc,all_cell_file)))
spike1_list = []
spike4_list = []
spike7_list = []
spikeins = ['Spike1', 'Spike4', 'Spike7']
for cell in all_cells:
spike1_list += [cell.spikeins_rsem.loc['Spike_1']['est_counts']]
spike4_list += [cell.spikeins_rsem.loc['Spike_4']['est_counts']]
spike7_list += [cell.spikeins_rsem.loc['Spike_7']['est_counts']]
spike1_list = np.array(spike1_list)/2
spike4_list = np.array(spike4_list)/2
spike7_list = np.array(spike7_list)/2
fig = plt.figure()
ax = fig.add_subplot(111)
ax.patch.set_facecolor('white')
spk1 = fig.add_subplot(311)
spk4 = fig.add_subplot(312)
spk7 = fig.add_subplot(313)
ax.spines['top'].set_color('none')
ax.spines['bottom'].set_color('none')
ax.spines['left'].set_color('none')
ax.spines['right'].set_color('none')
ax.tick_params(labelcolor = 'w', top = 'off', bottom = 'off', left = 'off', right = 'off')
spk1.hist(spike1_list, bins = 40, label = 'Spike 1 reads')
spk1.legend()
spk4.hist(spike4_list, bins = 40, label = 'Spike 4 reads', color = 'red')
spk4.legend()
spk7.hist(spike7_list, bins = 40, label = 'Spike 7 reads', color = 'green')
spk7.legend()
ax.set_xlabel('Estimated counts')
ax.set_ylabel('Number of cells')
ax.set_title('Estimated counts for RNA spike ins')
plt.legend()
fig.tight_layout()
plt.savefig("plots/qc6_spikein_histogram_rsem.pdf")