Skip to content

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
  • Loading branch information
Matiss Ozols committed Nov 30, 2023
2 parents 5b61ac1 + edca4bf commit 9061c80
Show file tree
Hide file tree
Showing 31 changed files with 4,315 additions and 1,279 deletions.
2 changes: 1 addition & 1 deletion assets/deploy_scripts/bsub.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,5 @@ if ["$varname" = ''];
fi
sample="$RUN_ID"
echo -e "\n Submitting yascp (https://github.com/wtsi-hgi/yascp) with input file $INPUT_FILE"
bsub -R'select[mem>8000] rusage[mem=8000]' -J $sample -n 1 -M 8000 -o $sample.o -e $sample.e -q long bash /software/hgi/pipelines/yascp_versions/yascp_v1.2/assets/deploy_scripts/nohup_start_nextflow_lsf.sh $INPUT_FILE
bsub -R'select[mem>8000] rusage[mem=8000]' -J $sample -n 1 -M 8000 -o $sample.o -e $sample.e -q long bash /software/hgi/pipelines/yascp_versions/yascp_v1.3__work/assets/deploy_scripts/nohup_start_nextflow_lsf.sh $INPUT_FILE
echo "Submitted job can be killed with: bkill -J $sample"
2 changes: 1 addition & 1 deletion assets/deploy_scripts/bsub__removeWork.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ INPUT_FILE=$1
export RUN_ID="${PWD##*/}"
sample="$RUN_ID.yascp"
echo "Cleaning the work directory (https://github.com/wtsi-hgi/yascp) with input file $INPUT_FILE by using '-entry WORK_DIR_REMOVAL --remove_work_dir' "
bsub -R'select[mem>4000] rusage[mem=4000]' -J $sample -n 1 -M 4000 -o $sample.o -e $sample.e -q long bash /software/hgi/pipelines/yascp_versions/yascp_v1.2/assets/deploy_scripts/nohup_start_nextflow_lsf__removeWork.sh $INPUT_FILE
bsub -R'select[mem>4000] rusage[mem=4000]' -J $sample -n 1 -M 4000 -o $sample.o -e $sample.e -q long bash /software/hgi/pipelines/yascp_versions/yascp_v1.3__work/assets/deploy_scripts/nohup_start_nextflow_lsf__removeWork.sh $INPUT_FILE
echo "Submitted job can be killed with: bkill -J $sample"
2 changes: 1 addition & 1 deletion assets/deploy_scripts/bsub_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,5 @@ fi

sample="$RUN_ID.yascp"
echo -e "\nSubmitting yascp (https://github.com/wtsi-hgi/yascp) in test mode withsample OneK1k dataset"
bsub -R'select[mem>4000] rusage[mem=4000]' -J yascp_test -n 1 -M 4000 -o yascp_test.o -e yascp_test.e -q normal bash /software/hgi/pipelines/yascp_versions/yascp_v1.2/assets/deploy_scripts/nohup_start_nextflow_lsf_test.sh
bsub -R'select[mem>4000] rusage[mem=4000]' -J yascp_test -n 1 -M 4000 -o yascp_test.o -e yascp_test.e -q normal bash /software/hgi/pipelines/yascp_versions/yascp_v1.3__work/assets/deploy_scripts/nohup_start_nextflow_lsf_test.sh
echo "Submitted job can be killed with: bkill -J yascp_test"
2 changes: 1 addition & 1 deletion assets/deploy_scripts/bsub_test_celltypes.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,5 @@ fi

sample="$RUN_ID.yascp"
echo -e "\nSubmitting yascp (https://github.com/wtsi-hgi/yascp) in JUST_CELLTYPES mode with input file $INPUT_FILE"
bsub -R'select[mem>4000] rusage[mem=4000]' -J yascp_celltypes -n 1 -M 4000 -o yascp_celltypes.o -e yascp_celltypes.e -q normal bash /software/hgi/pipelines/yascp_versions/yascp_v1.2/assets/deploy_scripts/nohup_start_nextflow_lsf_celltypes.sh $INPUT_FILE
bsub -R'select[mem>4000] rusage[mem=4000]' -J yascp_celltypes -n 1 -M 4000 -o yascp_celltypes.o -e yascp_celltypes.e -q normal bash /software/hgi/pipelines/yascp_versions/yascp_v1.3__work/assets/deploy_scripts/nohup_start_nextflow_lsf_celltypes.sh $INPUT_FILE
echo "Submitted job can be killed with: bkill -J yascp_celltypes"
29 changes: 29 additions & 0 deletions assets/deploy_scripts/bsub_test_recluster.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env bash
CWD1="$PWD"
parentdir="$(dirname "$CWD1")"
INPUT_FILE=$1
export RUN_ID="${PWD##*/}"

# export SINGULARITY_CACHEDIR='/software/hgi/containers/yascp'

export NXF_OPTS="-Xms5G -Xmx5G"
export SINGULARITY_TMPDIR=$PWD/work/tmp
export TEMP=$PWD/work/tmp
export TMP_DIR=$PWD/work/tmp

echo press ENTER to NOT fetch containers, otherwise provide writable path:
read varname

if ["$varname" = ''];
then
export NXF_SINGULARITY_CACHEDIR='/software/hgi/containers/yascp'
export SINGULARITY_DISABLE_CACHE=0
else
echo Yascp Will fetch the containers and place them in $varname
export NXF_SINGULARITY_CACHEDIR=$varname
fi

sample="$RUN_ID.yascp"
echo -e "\nSubmitting yascp (https://github.com/wtsi-hgi/yascp) in JUST_RECLUSTER mode with input file $INPUT_FILE"
bsub -R'select[mem>4000] rusage[mem=4000]' -J yascp_cluster -n 1 -M 4000 -o yascp_cluster.o -e yascp_cluster.e -q normal bash /software/hgi/pipelines/yascp_versions/yascp_v1.3__work/assets/deploy_scripts/nohup_start_nextflow_lsf_recluster.sh $INPUT_FILE
echo "Submitted job can be killed with: bkill -J yascp_cluster"
138 changes: 138 additions & 0 deletions assets/deploy_scripts/input_setups/recluster_profile.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
params {

lisi{
run_process=true
}
replace_genotype_ids=false
write_h5=true
cluster_validate_resolution_keras = true
// run_celltype_assignment = true
project_name = 'T_Cell_Bio_Response'
filter_outliers = false
extra_sample_metadata =""
output_dir = outdir= "${launchDir}/recluster_resolutions"
cellex_cluster_markers=true
cluster_markers = false
normalise_andata = false
skip_handover = true
// output_dir = outdir= "${launchDir}/results"
// run_celltype_assignment=true
split_ad_per_bach=true //if not splitting the celltype assignment will be run on full tranche
// input_data_table = "$outdir/handover/Summary_plots/$RUN_ID/Fetch Pipeline/Input/input_table.tsv"
// cellbender_location="${output_dir}/nf-preprocessing/cellbender" //!!!!! if cellbender is run already then can skip this by selecting input = 'existing_cellbender' instead input = 'cellbender'
// existing_cellsnp="${output_dir}/cellsnp"
cellbender_location="/lustre/scratch123/hgi/teams/hgi/mo11/tmp_projects/harriet/qc/results_11_09_2023/nf-preprocessing/cellbender" //!!!!! if cellbender is run already then can skip this by selecting input = 'existing_cellbender' instead input = 'cellbender'
existing_cellsnp="/lustre/scratch123/hgi/teams/hgi/mo11/tmp_projects/harriet/qc/results/cellsnp"

skip_preprocessing = true
// file__anndata_merged = '/lustre/scratch126/humgen/projects/sc-eqtl-ibd/analysis/harriet_analysis/230313_hb58_yascp_analysis/231114_h5ad_files_for_MCC/231120_TCs_only_regressed_counts_HVGs.h5ad'

harmony{
run_process= true
}
umap{
run_process = true
colors_quantitative{
description = 'Comma separated string of quantitative variables that will be used to color points.'
value = 'n_cells,total_counts,pct_counts_gene_group__mito_transcript,prob_doublet,pct_counts_gene_group__ribo_rna,Azimuth:predicted.celltype.l2.score,Azimuth:mapping.score,log10_ngenes_by_count'
}
colors_categorical{
description = 'Comma separated string of categorical variables that will be used to color points.'
value = 'cell_passes_qc,cell_passes_qc-per:Azimuth:L0_predicted.celltype.l2,experiment_id,Azimuth:predicted.celltype.l2,Celltypist:Immune_All_Low:predicted_labels,Celltypist:Immune_All_High:predicted_labels,donor_id'
}
}

mads_categories ='pct_counts_gene_group__mito_transcript,pct_counts_gene_group__mito_protein,pct_counts_gene_group__ribo_protein,pct_counts_gene_group__ribo_rna,total_counts,n_genes_by_counts,log10_ngenes_by_count'
// hard_filters_file = "${projectDir}/../sample_qc.yml"
// hard_filters_drop = false //#This indicates whether we want to drop the cells that fail hard filters of just flag them

cluster{
description = """Parameters for clustering. All pairwise combinations of
method and resolution will be performed."""
number_neighbors{
description = """Number of neighbors. If <= 0, uses number of unique
experiment_id."""
value = 15
}
methods{
description = 'Clustering method. Valid options [leiden|louvain].'
value = 'leiden'
}
resolutions{
description = 'Clustering resolution.'
value = [0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]
}

variables_boxplot{
decription = 'Generate boxplots of these variables for each cluster.'
value ='n_cells,total_counts,pct_counts_gene_group__mito_transcript'
}

known_markers{
run_process = false
description = """Files with markers that will be used to generate
dotplots. Each marker file should be the full path and have the
following columns: cell_type, hgnc_symbol. The following columns
are optional: p_value_adj. Use "" for a single entry in the
file_id and file value to indicate no plots."""
value = [
[ file_id: 'SmillieCS_31348891', file: '/lustre/scratch119/humgen/projects/sc-eqtl-ibd/data/marker_gene_db-raw_data/database/celltypes/colon/SmillieCS-31348891/database.tsv' ],
[ file_id: 'ParikhK_30814735', file: '/lustre/scratch119/humgen/projects/sc-eqtl-ibd/data/marker_gene_db-raw_data/database/celltypes/colon/ParikhK-30814735/database.tsv' ],
[ file_id: 'JamesKR_32066951', file: '/lustre/scratch119/humgen/projects/sc-eqtl-ibd/data/marker_gene_db-raw_data/database/celltypes/colon-immune/JamesKR-32066951/database.tsv' ]
]
}




}
bbknn{
run_process = true
}

celltype_assignment{
run_celltype_assignment=false
run_azimuth=true
run_keras=false
run_celltypist=true
}
reduced_dims{
vars_to_regress{
value = ''
}
}

}

process {

withName: plot_distributions{
containerOptions = "--containall --cleanenv --workdir /tmp -B /tmp"
}

withName: cellex_cluster_markers{
maxForks=7
memory = 300.GB
}

withName: GATHER_DATA{
maxForks=7
memory = 100.GB
}
withName: LISI{
maxForks=7
memory = 300.GB
}
withName: cluster_validate_resolution_keras{
memory = 300.GB
}

withName: umap_calculate_and_plot{
memory = 300.GB
}

withName: sccaf_assess_clustering{
memory = 300.GB
}

}
2 changes: 1 addition & 1 deletion assets/deploy_scripts/nohup_start_nextflow_lsf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ parentdir="$(dirname "$CWD1")"
export RUN_ID="${PWD##*/}"
mkdir $PWD/work || echo 'exists'
mkdir $PWD/work/tmp || echo 'exists'
echo $RUN_ID | nextflow run /software/hgi/pipelines/yascp_versions/yascp_v1.2 -profile sanger -c $INPUT_FILE --nf_ci_loc $PWD -resume > nextflow.nohup.log 2>&1 &
echo $RUN_ID | nextflow run /software/hgi/pipelines/yascp_versions/yascp_v1.3__work -profile sanger -c $INPUT_FILE --nf_ci_loc $PWD -resume > nextflow.nohup.log 2>&1 &

# get process PID
sleep 1 && export PID=$(pgrep -f "\\-\\-nf_ci_loc $RUN_DIR")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ export RUN_ID="${PWD##*/}"
# export TEMP=$PWD/tmp
# export TMP_DIR=$PWD/tmp

echo $RUN_ID | nextflow run /software/hgi/pipelines/yascp_versions/yascp_v1.2 -profile sanger -c $INPUT_FILE --nf_ci_loc $PWD -entry WORK_DIR_REMOVAL --remove_work_dir -resume > nextflow.nohup.log 2>&1 &
echo $RUN_ID | nextflow run /software/hgi/pipelines/yascp_versions/yascp_v1.3__work -profile sanger -c $INPUT_FILE --nf_ci_loc $PWD -entry WORK_DIR_REMOVAL --remove_work_dir -resume > nextflow.nohup.log 2>&1 &

# get process PID
sleep 1 && export PID=$(pgrep -f "\\-\\-nf_ci_loc $RUN_DIR")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ parentdir="$(dirname "$CWD1")"
export RUN_ID="${PWD##*/}"
mkdir $PWD/work || echo 'exists'
mkdir $PWD/work/tmp || echo 'exists'
echo $RUN_ID | nextflow run /software/hgi/pipelines/yascp_versions/yascp_v1.2 -profile sanger -entry JUST_CELLTYPES -c $INPUT_FILE --nf_ci_loc $PWD -resume > nextflow.nohup.log 2>&1 &
echo $RUN_ID | nextflow run /software/hgi/pipelines/yascp_versions/yascp_v1.3__work -profile sanger -entry JUST_CELLTYPES -c $INPUT_FILE --nf_ci_loc $PWD -resume > nextflow.nohup.log 2>&1 &

# get process PID
sleep 1 && export PID=$(pgrep -f "\\-\\-nf_ci_loc $RUN_DIR")
Expand Down
27 changes: 27 additions & 0 deletions assets/deploy_scripts/nohup_start_nextflow_lsf_recluster.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env bash
INPUT_FILE=$1
dt=`date +"%Y_%m_%d_%T"`
cp nextflow.nohup.log ./nextflow.nohup_$dt.log2 || echo 'first time running'
# activate Nextflow conda env

# clean up previous run files
rm -f *.log
rm -f nextflow.nohup.PID.txt

# start Nextflow in background:
export NXF_OPTS="-Xms5G -Xmx5G"

CWD1="$PWD"
parentdir="$(dirname "$CWD1")"
# export RUN_ID="${parentdir##*/}"
export RUN_ID="${PWD##*/}"
mkdir $PWD/work || echo 'exists'
mkdir $PWD/work/tmp || echo 'exists'
echo $RUN_ID | nextflow run /software/hgi/pipelines/yascp_versions/yascp_v1.3__work -profile sanger -entry JUST_RECLUSTER -c /software/hgi/pipelines/yascp_versions/yascp_v1.3__work/assets/deploy_scripts/input_setups/recluster_profile.nf -c $INPUT_FILE --nf_ci_loc $PWD -resume > nextflow.nohup.log 2>&1 &

# get process PID
sleep 1 && export PID=$(pgrep -f "\\-\\-nf_ci_loc $RUN_DIR")
echo $PID > nextflow.nohup.PID.txt
echo "Nextflow PID is $PID (saved in ./nextflow.nohup.PID.txt)"
echo kill with \"kill $PID\"
echo "check logs files nextflow.nohup.log and .nextflow.log"
2 changes: 1 addition & 1 deletion assets/deploy_scripts/nohup_start_nextflow_lsf_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ parentdir="$(dirname "$CWD1")"
export RUN_ID="${PWD##*/}"
mkdir $PWD/work || echo 'exists'
mkdir $PWD/work/tmp || echo 'exists'
echo $RUN_ID | nextflow run /software/hgi/pipelines/yascp_versions/yascp_v1.2 -profile sanger,test --nf_ci_loc $PWD -resume > nextflow.nohup.log 2>&1 &
echo $RUN_ID | nextflow run /software/hgi/pipelines/yascp_versions/yascp_v1.3__work -profile sanger,test --nf_ci_loc $PWD -resume > nextflow.nohup.log 2>&1 &

# get process PID
sleep 1 && export PID=$(pgrep -f "\\-\\-nf_ci_loc $RUN_DIR")
Expand Down
17 changes: 10 additions & 7 deletions bin/0026-plot_filtered_cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,16 @@ def main():
# Check if any difference between before and after filters. If not,
# return early.
df_after_filters = df[df.filter_type.isin(['after_filters'])]
filt = df_after_filters.n_cells_left_in_adata == df_before_filters.loc[
df_after_filters.experiment_id,
'n_cells_left_in_adata'
].values
if all(filt):
print("No difference detected before and after filters. No plots.")
return()
try:
filt = df_after_filters.n_cells_left_in_adata == df_before_filters.loc[
df_after_filters.experiment_id,
'n_cells_left_in_adata'
].values
if all(filt):
print("No difference detected before and after filters. No plots.")
return()
except:
return()

# Set some plotting parameters
plt_height = 16 # 1.5 * df.experiment_id.nunique()
Expand Down
5 changes: 4 additions & 1 deletion bin/0028-plot_predicted_sex.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,10 @@ def main():

# Load the AnnData file
adata = sc.read_h5ad(filename=options.h5)

try:
adata.X=adata.layers['counts']
except:
_='counts may be already set'
# If we have a flag for cells that pass QC then filter down to them
if 'cell_passes_qc' in adata.obs:
adata = adata[adata.obs['cell_passes_qc'], :]
Expand Down
5 changes: 4 additions & 1 deletion bin/0030-estimate_pca_elbow.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,10 @@ def main():

# Read in the dataframe
adata = sc.read_h5ad(filename=options.h5)

try:
adata.X=adata.layers['counts']
except:
_='counts may be already set'
kneedle_dict = {}
output_dict = {}

Expand Down
Loading

0 comments on commit 9061c80

Please sign in to comment.