-
Notifications
You must be signed in to change notification settings - Fork 0
/
Config.yaml
66 lines (53 loc) · 4.05 KB
/
Config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
###<Please do change everything in A after the ":" to your own.>
############################################################################################################################
###A. Basic/required parameters setting
############################################################################################################################
###1. the location to download the PATRIC dataset to. ~246G (Please change to your local address.)
###dataset_location: /vol/projects/BIFO/patric_genome
dataset_location: /net/projects/BIFO/patric_genome
###2. The path to store the direct results of each software and further visualization. (either sbsolute path or relative path)
output_path: ./
###3. The path to store the tempary files (note: some software will generate large amount of temp files to 60G in the process,
### if you run all species in parallel. Either sbsolute path or relative path)
log_path: ./
###4 . max cpu
n_jobs: 20
###5. gpu possibility for Aytan-Aktug SSSA model, If set to False, parallelization on cpu will be applied;
# Otherwise, it will be applied on one gpu core sequentially.
gpu_on: True
#6. Cleaning large intermediate files of the specified software. Can be either empty, phenotypeseeker, AytanAktug, seq2geno
clean_software:
###<change the conda env names if these names have already existed in your PC.>
############################################################################################################################
###B. Env: Optional parameters setting
############################################################################################################################
amr_env_name: amr_env
amr_env_name2: amr2
PhenotypeSeeker_env_name: PhenotypeSeeker_env
multi_env_name: multi_env
#this environment includes pytorch, which should be installed according to https://pytorch.org/get-started/locally/ manually.
multi_torch_env_name: multi_torch_env
kover_env_name: kover_env
phylo_name: phylo_env
phylo_name2: phylo_env2
se2ge_env_name: snakemake_env2
kmer_env_name: kmer_kmc
resfinder_env: res_env
############################################################################################################################
###C. Advanced/optional parameters setting
############################################################################################################################
###1. 11 species for single-species models.
species_list: Escherichia_coli, Staphylococcus_aureus, Salmonella_enterica, Klebsiella_pneumoniae, Pseudomonas_aeruginosa, Acinetobacter_baumannii, Streptococcus_pneumoniae, Mycobacterium_tuberculosis, Campylobacter_jejuni, Enterococcus_faecium, Neisseria_gonorrhoeae
species_list_phylotree: Escherichia_coli, Staphylococcus_aureus, Salmonella_enterica, Klebsiella_pneumoniae, Pseudomonas_aeruginosa, Acinetobacter_baumannii, Streptococcus_pneumoniae, Campylobacter_jejuni, Enterococcus_faecium, Neisseria_gonorrhoeae
species_list_multi_antibiotics: Mycobacterium_tuberculosis, Escherichia_coli, Staphylococcus_aureus, Salmonella_enterica, Klebsiella_pneumoniae, Pseudomonas_aeruginosa, Acinetobacter_baumannii, Streptococcus_pneumoniae, Neisseria_gonorrhoeae
### 2. <You don't need to change parameters below, if you aim at reproducing the complete results of our benchmarking study.>
### 9 species for Aytan-Aktug SSMA and 3 MSMA models, and PhenotypeSeeker, Kover multi-species models
### For user defining species combinations for MSMA, please replace -f_all with -s "${species[@]}" in ./scripts/model/AytanAktug_MSMA_concat.sh and ./scripts/model/AytanAktug_MSMA_discrete.sh
species_list_multi_species: Mycobacterium_tuberculosis, Salmonella_enterica, Streptococcus_pneumoniae, Escherichia_coli, Staphylococcus_aureus, Klebsiella_pneumoniae, Acinetobacter_baumannii, Pseudomonas_aeruginosa, Campylobacter_jejuni
merge_name: Mt_Se_Sp_Ec_Sa_Kp_Ab_Pa_Cj
### CV settings for nested CV
cv_number: 10
### 5 CV folds + 1 hold out set. Only for Aytan-Aktug Multi-species model.
cv_number_multiS: 6
### sample selection criteria: loose/trict. By default, we only benchmarked on the loosely selected dataset.
QC_criteria: loose