-
Notifications
You must be signed in to change notification settings - Fork 0
/
_data.py
67 lines (54 loc) · 1.6 KB
/
_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#%%
if __name__ == '__main__':
__package__ = 'combat_atlas'
import tarfile
import pandas as pd
import xarray as xa
from .common.caching import compose, lazy
from ._helpers import config
#%%
class _data:
@compose(property, lazy)
def clinvar(self):
x = config.cache/'download'/'CBD-KEY-CLINVAR'/'COMBAT_CLINVAR_for_processed.txt'
x = pd.read_csv(x, sep='\t')
x = x.set_index('row_number').to_xarray()
return x
@compose(property, lazy)
def rnaseq_wb(self):
x = config.cache/'download'/'CBD-KEY-RNASEQ-WB'/'Raw_count_data_143_60683.txt'
x = pd.read_csv(x, sep='\t')
x = xa.DataArray(
x, coords=[('feature', x.index), ('sample', x.columns)],
name='rnaseq'
)
return x
@compose(property, lazy)
def rnaseq_wb_logcpm(self):
x = config.cache/'download'/'CBD-KEY-RNASEQ-WB'/'Logcpm_143_23063.txt'
x = pd.read_csv(x, sep='\t')
x = xa.DataArray(
x, coords=[('feature', x.index), ('sample', x.columns)],
name='rnaseq'
)
return x
@compose(property, lazy)
def citeseq1(self):
import scanpy
x = config.cache/'download'/'COMBAT-CITESeq-DATA.h5ad'
x = scanpy.read(x)
return x
def make_cache():
pass
data = _data()
#%%
if __name__ == '__main__':
self = data
# %%
z = self.clinvar
z = z.sel(row_number=~z.RNASeq_sample_ID.isnull())
z = z.swap_dims(row_number='RNASeq_sample_ID')
z = z.rename(RNASeq_sample_ID='sample')
x2 = self.rnaseq_wb
x2 = xa.merge([x2, z], join='inner')
# %%