Skip to content

Commit

Permalink
Merge branch 'v1.7' into v1.8_pre
Browse files Browse the repository at this point in the history
  • Loading branch information
Matiss Ozols committed Dec 13, 2024
2 parents 1c72cf3 + 6726a96 commit a745839
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 34 deletions.
34 changes: 5 additions & 29 deletions bin/split_h5ad_per_donor.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,37 +154,13 @@ def split_h5ad_per_donor(vireo_donor_ids_tsv, filtered_matrix_h5, samplename,
else:
logging.info('Samples are not deconvoluted')
adata.obs['convoluted_samplename'] = samplename
method = 'Scrublet'
# here we add doublets from multiplet scrubblet output
# scrublet = '222CC24C93C884F3-Card_Val11211773-scrublet.tsv.gz'
try:
scrublet_data = pd.read_csv(scrublet,compression='gzip',sep='\t')
except:
scrublet_data = pd.read_csv(scrublet,sep='\t')
doublets = scrublet_data[scrublet_data['scrublet__predicted_multiplet']]
doublets_nr = len(doublets)
donor_cell_nr = len(scrublet_data)-doublets_nr
method = 'After cellbender'
donor_cell_nr = len(adata.obs)
cells_per_donor_count_dic = {}
cells_per_donor_count_dic[1]={"donor_id":'donor','n_cells':donor_cell_nr}
cells_per_donor_count_dic[2]={'donor_id':'doblets','n_cells':doublets_nr}
adata.obs['donor_id']='donor'
# cells_per_donor_count_dic[2]={'donor_id':'doblets','n_cells':doublets_nr}
cells_per_donor_count = pd.DataFrame(cells_per_donor_count_dic).T
try:
idt = 'cell_barcode'
scrublet_data=scrublet_data.set_index('cell_barcode')
except:
idt = 'barcodes'
scrublet_data=scrublet_data.set_index('barcodes')
scrublet_data['donor_id']='donor'
scrublet_data.loc[list(doublets[idt]),'donor_id']='doublet'
scrublet_data['prob_doublet']=scrublet_data['scrublet__multiplet_scores']
for new_cell_annotation in ['donor_id','prob_max','prob_doublet','n_vars','best_singlet','best_doublet']:
try:
adata.obs[new_cell_annotation] = scrublet_data[new_cell_annotation]
except:
adata.obs[new_cell_annotation] = 'nan'




# plot n cells per deconvoluted Vireo donor:
if plot_n_cells_per_vireo_donor:
Expand All @@ -198,7 +174,7 @@ def split_h5ad_per_donor(vireo_donor_ids_tsv, filtered_matrix_h5, samplename,
axis_text_y=plt9.element_text(colour="black"))
gplt = gplt + plt9.geom_bar(stat='identity', position='dodge')
gplt = gplt + plt9.geom_text(plt9.aes(label='n_cells'))
gplt = gplt + plt9.labels.ggtitle(f'{method} deconvolution\nnumber of cells per deconvoluted donor\nsample: ' + samplename)
gplt = gplt + plt9.labels.ggtitle(f'{method} \nnumber of cells per deconvoluted donor\nsample: ' + samplename)
gplt = gplt + plt9.labels.xlab('deconvoluted donor')
gplt = gplt + plt9.labels.ylab(f'Number of cells assigned by {method}')

Expand Down
11 changes: 6 additions & 5 deletions bin/totalVI.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@

#import single cell data and CITE-seq data
# SLEmap = sc.read('adata-normalized.h5ad')
SLEmap = sc.read(options.h5ad_file)
SLEmap = sc.read(options.h5ad_file, backed ='r')

all_cite_files = glob.glob("./*/*.matrix.csv")
CITE = pd.DataFrame()
Expand All @@ -59,12 +59,13 @@
SLEmap.obsm['protein_expression'] = CITE_2

# keep only cells passing QC and highly variable genes
SLEmap = SLEmap[SLEmap.obs["cell_passes_qc"],:]
SLEmap = SLEmap[SLEmap.obs["cell_passes_hard_filters"],:]
SLEmap = SLEmap[:,SLEmap.var["highly_variable"]]
SLEmap = SLEmap[
(SLEmap.obs["cell_passes_qc"] & SLEmap.obs["cell_passes_hard_filters"]),
SLEmap.var["highly_variable"]
]

#run totalVI
SLEmap = SLEmap.copy()
SLEmap = SLEmap.to_memory().copy()
scvi.model.TOTALVI.setup_anndata(SLEmap, protein_expression_obsm_key="protein_expression",batch_key="experiment_id")
model = scvi.model.TOTALVI(SLEmap,latent_distribution="normal",n_layers_decoder=2)

Expand Down

0 comments on commit a745839

Please sign in to comment.