-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
November 2023 - April 2024
- Loading branch information
Showing
67 changed files
with
4,823 additions
and
171 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
{ | ||
"full-splice_match": [ | ||
{ | ||
"perc_A_downstream_TTS":[0,59] | ||
} | ||
], | ||
"rest": [ | ||
{ | ||
"perc_A_downstream_TTS":[0,59], | ||
"all_canonical":"canonical", | ||
"RTS_stage":"FALSE" | ||
}, | ||
{ | ||
"perc_A_downstream_TTS":[0,59], | ||
"RTS_stage":"FALSE", | ||
"min_cov":0 | ||
} | ||
] | ||
} | ||
|
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
Sample Sample Number of Iso-Seq Reads Number of ONT Reads Genotype Age (months) | ||
K19 Mouse 1 49638 - WT 4 | ||
K23 Mouse 2 51020 - WT 8 | ||
K21 Mouse 3 53727 - WT 6 | ||
K18 Mouse 4 63328 - TG 2 | ||
K20 Mouse 5 59153 - TG 4 | ||
K17 Mouse 6 50058 - WT 2 | ||
S19 Mouse 7 24293 738056 WT 4 | ||
K24 Mouse 8 25705 763528 TG 8 | ||
L22 Mouse 9 28081 807960 TG 8 | ||
M21 Mouse 10 27351 725260 WT 2 | ||
O18 Mouse 11 27135 824401 TG 2 | ||
O23 Mouse 12 24598 800543 WT 8 | ||
O22 Mouse 13 25283 765738 TG 6 | ||
P19 Mouse 14 22427 730010 WT 6 | ||
T20 Mouse 15 26239 819564 TG 6 | ||
Q20 Mouse 16 28114 1317511 TG 8 | ||
Q21 Mouse 17 23770 1080464 WT 2 | ||
S18 Mouse 18 27546 1131981 TG 2 | ||
S23 Mouse 19 24322 998458 WT 8 | ||
Q18 Mouse 20 32683 1295303 TG 6 | ||
Q17 Mouse 21 13861 666928 WT 6 | ||
L18 Mouse 22 16530 793802 TG 4 | ||
Q23 Mouse 23 18572 1117300 WT 4 | ||
T18 Mouse 24 24938 1150108 TG 4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
Sample Sample Number of Reads Genotype Age (months) | ||
K23 Mouse 2 351454 WT 8 | ||
K18 Mouse 4 338557 TG 2 | ||
K17 Mouse 6 329636 WT 2 | ||
K24 Mouse 8 350120 TG 8 | ||
L22 Mouse 9 328831 TG 8 | ||
M21 Mouse 10 339563 WT 2 | ||
O18 Mouse 11 212387 TG 2 | ||
O23 Mouse 12 329376 WT 8 | ||
Q20 Mouse 16 342173 TG 8 | ||
Q21 Mouse 17 227113 WT 2 | ||
S18 Mouse 18 358413 TG 2 | ||
S23 Mouse 19 354154 WT 8 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
>primer_5p | ||
AAGCAGTGGTATCAACGCAGAGTACATGGG | ||
>primer_3p | ||
GTACTCTGCGTTGATACCACTGCTT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
50 changes: 50 additions & 0 deletions
50
A_Global_Transcriptome/1_IsoSeq_Pipeline/1b_J20_run_isoseq3.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
#!/bin/bash | ||
#SBATCH --export=ALL # export all environment variables to the batch job | ||
#SBATCH -D . # set working directory to . | ||
#SBATCH -p mrcq # submit to the parallel queue | ||
#SBATCH --time=20:00:00 # maximum walltime for the job | ||
#SBATCH -A Research_Project-MRC148213 # research project to submit under | ||
#SBATCH --nodes=1 # specify number of nodes | ||
#SBATCH --ntasks-per-node=16 # specify number of processors per node | ||
#SBATCH --mail-type=END # send email at job completion | ||
#SBATCH [email protected] # email address | ||
#SBATCH --array=0-1 # 2 samples | ||
#SBATCH --output=1b_J20_run_isoseq3-%A_%a.o | ||
#SBATCH --error=1b_J20_run_isoseq3-%A_%a.e | ||
|
||
|
||
# J20 samples | ||
|
||
##------------------------------------------------------------------------- | ||
|
||
# source config file and function script | ||
module load Miniconda2/4.3.21 | ||
SC_ROOT=/lustre/projects/Research_Project-MRC148213/sl693/scripts/rTg4510/A_Global_Transcriptome | ||
source $SC_ROOT/1_IsoSeq_Pipeline/rTg4510_isoseq.config | ||
source $SC_ROOT/1_IsoSeq_Pipeline/01_source_functions.sh | ||
|
||
|
||
##------------------------------------------------------------------------- | ||
|
||
# run as array (defined in config file) | ||
rawDir=/lustre/projects/Research_Project-MRC148213/sl693/rTg4510/1_raw/A_WholeTranscriptome/J20_PacBio | ||
SAMPLE=${J20_ALL_SAMPLE_NAMES[${SLURM_ARRAY_TASK_ID}]} | ||
J20_BAM_FILES=($rawDir/m54082_190302_104610.subreads.bam $rawDir/m54082_180816_074627.subreads.bam) | ||
BAM_FILE=${J20_BAM_FILES[${SLURM_ARRAY_TASK_ID}]} | ||
|
||
|
||
##------------------------------------------------------------------------- | ||
|
||
# Isoseq3.4.0 | ||
# run_CCS_batch <input_ccs_bam> <prefix_output_name> <Output_directory> | ||
# run_LIMA $Sample $Input_CCS_directory $Output_directory <"no_multiplex"/"multiplex"> | ||
# run_REFINE $Sample $Input_LIMA_directory $Output_directory | ||
# run_CLUSTER $Sample $Input_REFINE_directory $Output_directory | ||
run_CCS ${BAM_FILE} ${SAMPLE} ${WKD_ROOT}/1_isoseq3/1_ccs | ||
run_LIMA ${SAMPLE} ${WKD_ROOT}/1_isoseq3/1_ccs ${WKD_ROOT}/1_isoseq3/2_lima "no_multiplex" | ||
run_REFINE ${SAMPLE} ${WKD_ROOT}/1_isoseq3/2_lima ${WKD_ROOT}/1_isoseq3/3_refine | ||
run_CLUSTER ${SAMPLE} ${WKD_ROOT}/1_isoseq3/3_refine ${WKD_ROOT}/1_isoseq3/4_cluster | ||
|
||
|
||
##------------------------------------------------------------------------- | ||
#run_star ${SAMPLE} ${RNASEQ_FILTERED_DIR} ${RNASEQ_MAPPED_DIR} |
52 changes: 52 additions & 0 deletions
52
A_Global_Transcriptome/1_IsoSeq_Pipeline/1c_J20_run_isoseq3.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#!/bin/bash | ||
#SBATCH --export=ALL # export all environment variables to the batch job | ||
#SBATCH -D . # set working directory to . | ||
#SBATCH -p mrcq # submit to the parallel queue | ||
#SBATCH --time=20:00:00 # maximum walltime for the job | ||
#SBATCH -A Research_Project-MRC148213 # research project to submit under | ||
#SBATCH --nodes=1 # specify number of nodes | ||
#SBATCH --ntasks-per-node=16 # specify number of processors per node | ||
#SBATCH --mail-type=END # send email at job completion | ||
#SBATCH [email protected] # email address | ||
#SBATCH --array=0-1 # 2 samples | ||
#SBATCH --output=1c_J20_run_isoseq3-%A_%a.o | ||
#SBATCH --error=1c_J20_run_isoseq3-%A_%a.e | ||
|
||
|
||
# J20 samples: E18 and B21 | ||
|
||
##------------------------------------------------------------------------- | ||
|
||
# source config file and function script | ||
module load Miniconda2/4.3.21 | ||
SC_ROOT=/lustre/projects/Research_Project-MRC148213/lsl693/scripts/rTg4510/A_Global_Transcriptome | ||
source $SC_ROOT/1_IsoSeq_Pipeline/rTg4510_isoseq.config | ||
source $SC_ROOT/1_IsoSeq_Pipeline/01_source_functions.sh | ||
|
||
|
||
##------------------------------------------------------------------------- | ||
|
||
# run as array (defined in config file) | ||
rawDir=/lustre/projects/Research_Project-MRC148213/lsl693/rTg4510/1_raw/A_WholeTranscriptome/J20_PacBio | ||
J20_ALL_SAMPLE_NAMES=(E18 B21) | ||
J20_BAM_FILES=($rawDir/m54082_180818_105629.subreads.bam $rawDir/m54082_190303_070925.subreads.bam) | ||
|
||
SAMPLE=${J20_ALL_SAMPLE_NAMES[${SLURM_ARRAY_TASK_ID}]} | ||
BAM_FILE=${J20_BAM_FILES[${SLURM_ARRAY_TASK_ID}]} | ||
|
||
|
||
##------------------------------------------------------------------------- | ||
|
||
# Isoseq3.4.0 | ||
# run_CCS_batch <input_ccs_bam> <prefix_output_name> <Output_directory> | ||
# run_LIMA $Sample $Input_CCS_directory $Output_directory <"no_multiplex"/"multiplex"> | ||
# run_REFINE $Sample $Input_LIMA_directory $Output_directory | ||
# run_CLUSTER $Sample $Input_REFINE_directory $Output_directory | ||
run_CCS ${BAM_FILE} ${SAMPLE} ${WKD_ROOT}/1_isoseq3/1_ccs | ||
run_LIMA ${SAMPLE} ${WKD_ROOT}/1_isoseq3/1_ccs ${WKD_ROOT}/1_isoseq3/2_lima "no_multiplex" | ||
run_REFINE ${SAMPLE} ${WKD_ROOT}/1_isoseq3/2_lima ${WKD_ROOT}/1_isoseq3/3_refine | ||
run_CLUSTER ${SAMPLE} ${WKD_ROOT}/1_isoseq3/3_refine ${WKD_ROOT}/1_isoseq3/4_cluster | ||
|
||
|
||
##------------------------------------------------------------------------- | ||
#run_star ${SAMPLE} ${RNASEQ_FILTERED_DIR} ${RNASEQ_MAPPED_DIR} |
56 changes: 56 additions & 0 deletions
56
A_Global_Transcriptome/1_IsoSeq_Pipeline/2b_map_annotate_isoform.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
#!/bin/bash | ||
#SBATCH --export=ALL # export all environment variables to the batch job | ||
#SBATCH -D . # set working directory to . | ||
#SBATCH -p mrcq # submit to the parallel queue | ||
#SBATCH --time=20:00:00 # maximum walltime for the job | ||
#SBATCH -A Research_Project-MRC148213 # research project to submit under | ||
#SBATCH --nodes=1 # specify number of nodes | ||
#SBATCH --ntasks-per-node=16 # specify number of processors per node | ||
#SBATCH --mail-type=END # send email at job completion | ||
#SBATCH [email protected] # email address | ||
#SBATCH --output=2b_map_annotate_isoform.o2 | ||
#SBATCH --error=2b_map_annotate_isoform.e2 | ||
|
||
# J20 C20, C21, B21 and E18 Iso-Seq pipeline | ||
|
||
|
||
##------------------------------------------------------------------------- | ||
|
||
# source config file and function script | ||
module load Miniconda2/4.3.21 | ||
SC_ROOT=/lustre/projects/Research_Project-MRC148213/lsl693/scripts/rTg4510/A_Global_Transcriptome | ||
LOGEN_ROOT=/lustre/projects/Research_Project-MRC148213/lsl693/scripts/LOGen | ||
source $SC_ROOT/1_IsoSeq_Pipeline/rTg4510_isoseq.config | ||
source $SC_ROOT/1_IsoSeq_Pipeline/01_source_functions.sh | ||
export PATH=$PATH:${LOGEN_ROOT}/miscellaneous | ||
export PATH=$PATH:${LOGEN_ROOT}/assist_isoseq_processing | ||
export PATH=$PATH:${LOGEN_ROOT}/assist_ont_processing | ||
|
||
J20_ALL_SAMPLE_NAMES=(B21 C20 C21 E18) | ||
|
||
##------------------------------------------------------------------------- | ||
|
||
# merging_at_refine <input_flnc_bam_dir> <output_directory> <output_J20NAME> <samples.....> | ||
#merging_at_refine $WKD_ROOT/1_isoseq3/3_refine $WKD_ROOT/1_isoseq3/5_merged_cluster ${J20NAME} ${J20_ALL_SAMPLE_NAMES[@]} | ||
#refine2fasta $WKD_ROOT/1_isoseq3/3_refine ${J20_ALL_SAMPLE_J20NAMES[@]} | ||
|
||
# align individual samples | ||
# run_pbmm2align <output_J20NAME> <clustered_dir> <mapped_dir> | ||
#for i in ${J20_ALL_SAMPLE_NAMES[@]}; do run_pbmm2align $i $WKD_ROOT/1_isoseq3/4_cluster $WKD_ROOT/2_post_isoseq3/6_minimap; done | ||
|
||
# filter_alignment <J20NAME> <mapped_dir> | ||
#for i in ${J20_ALL_SAMPLE_NAMES[@]}; do filter_alignment $i $WKD_ROOT/2_post_isoseq3/6_minimap; done | ||
|
||
# run_map_cupcakecollapse <sample_prefix_input/output_J20NAME> <isoseq3_input_directory> <mapping_output_directory> <tofu_output_directory> | ||
#run_map_cupcakecollapse ${J20NAME} $WKD_ROOT/1_isoseq3/5_merged_cluster $WKD_ROOT/2_post_isoseq3/6_minimap $WKD_ROOT/2_post_isoseq3/7_tofu | ||
|
||
# demux <J20NAME> <refine_dir> <cluster_report> <tofu_dir> | ||
#demux ${J20NAME} $WKD_ROOT/1_isoseq3/3_refine $WKD_ROOT/1_isoseq3/5_merged_cluster/${J20NAME}.clustered.cluster_report.csv $WKD_ROOT/2_post_isoseq3/7_tofu | ||
|
||
|
||
##------------------------------------------------------------------------- | ||
|
||
source activate sqanti2_py3 | ||
cd $WKD_ROOT/2_post_isoseq3/9_sqanti3 | ||
python ${SQANTI3_DIR}/sqanti3_qc.py -t 30 $WKD_ROOT/2_post_isoseq3/7_tofu/${J20NAME}.collapsed.gff ${GENOME_GTF} ${GENOME_FASTA} --CAGE_peak ${CAGE_PEAK} --polyA_motif_list ${POLYA} --genename --isoAnnotLite --report skip &> ${J20NAME}.collapsed.sqanti.qc.log | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.