configuration_example_1_of_2.ini

# This is the first part of example configuration files for performing efficient
# data clean up. All commented out parameters are those that are used by
# default.


[1]
# ##############################################################################
# Checks sample contamination using the bafRegress tool
# (http://genome.sph.umich.edu/wiki/BAFRegress). Field name can be modify using
# options (as describe below).
# ##############################################################################

script = contamination
raw-dir = /PATH/TO/DIRECTORY/CONTAINING/INTENSITIES.txt
# colsample = Sample Name
# colmarker = SNP Name
# colbaf = B Allele Freq
# colab1 = Allele1 - AB
# colab2 = Allele2 - AB
# sge
# sge-walltime = WRITE WALLTIME ONLY IF REQUIRED
# sge-nodes = WRITE NB NODES AND NB PROCESSOR PER NODE ONLY IF REQUIRED
# sample-per-run-for-sge = 30


[2]
# ##############################################################################
# Checks missing rate and pairwise concordance of duplicated samples. Duplicated
# samples should have same family and individual identification numbers. The
# names can be modified directly in the transposed pedfile.
# ##############################################################################

script = duplicated_samples
# sample-completion-threshold = 0.9
# sample-concordance-threshold = 0.97


[3]
# ##############################################################################
# Checks missing rate and pairwise concordance of duplicated markers. Duplicated
# markers are found by looking at their chromosomal position. No modification of
# the transposed bedfile is required.
# ##############################################################################

script = duplicated_snps
# snp-completion-threshold = 0.9
# snp-concordance-threshold = 0.98
# frequency_difference = 0.05


[4]
# ##############################################################################
# Finds and removes markers which have a missing rate of 100% or markers (not
# located on mitochondrial chromosome) that have a heterozygosity rate of 0%.
# ##############################################################################

script = noCall_hetero_snps


[5]
# ##############################################################################
# Removes sample with a missing rate higher than a user defined threshold. For
# this step, we recommend using a threshold of 10% missing rate as samples with
# a missing rate of 2% will be later removed.
# ##############################################################################

script = sample_missingness
# mind = 0.1


[6]
# ##############################################################################
# Removes markers with a missing rate higher than a user defined threshold. For
# this step, we recommend using a threshold of 2% missing rate.
# ##############################################################################

script = snp_missingness
# geno = 0.02


[7]
# ##############################################################################
# Removes sample with a missing rate higher than a user defined threshold. For
# this step, we recommend using a threshold of 2% missing rate.
# ##############################################################################

script = sample_missingness
mind = 0.02


[8]
# ##############################################################################
# Using PLINK, finds samples with gender issues, according to heterozygosity
# rate on the X chromosome. If you want to produce a gender plot, you need to
# uncomment the "gender-plot" option and provide a file containing marker
# intensities on the X and Y chromosomes. If you want to produce a BAF and LRR
# plot, you need to uncomment the "lrr-baf" option and provide a directory
# containing the BAF and LRR values of each marker on the X and Y chromosomes
# (one file per sample).
# ##############################################################################

script = sex_check
# femaleF = 0.3
# maleF = 0.7
# nbChr23 = 50
# gender-plot
# sex-chr-intensities = /PATH/TO/FILE/CONTAINING/INTENSITIES_FILE.txt
# gender-plot-format = png
# lrr-baf
# lrr-baf-raw-dir = /PATH/TO/DIRECTORY/CONTAINING/BAF_LRR_FILES.txt
# lrr-baf-format = png
# lrr-baf-dpi = 300


[9]
# ##############################################################################
# Using PLINK, performs a plate bias analysis, using a p value threshold of
# 1.0e-7.
# ##############################################################################

script = plate_bias
loop-assoc = /PATH/TO/FILE/CONTAINING/PLATE_INFORMATION.txt
# pfilter = 1.0e-07


[10]
# ##############################################################################
# Checks for related individual and randomly keeps one of each related group. If
# you have a server with a DRMAA-compliant distributed resource management
# system, you can uncomment the "sge" and the "line-per-file-for-sge" options,
# to run this step in parallel.
# ##############################################################################

script = find_related_samples
# min-nb-snp = 10000
# indep-pairwise = 50 5 0.1
# maf = 0.05
# ibs2-ratio = 0.8
# sge
# line-per-file-for-sge = 100
# sge-walltime = WRITE WALLTIME ONLY IF REQUIRED
# sge-nodes = WRITE NB NODES AND NB PROCESSOR PER NODE ONLY IF REQUIRED


[11]
# ##############################################################################
# Using PLINK, computes the MDS value of each sample, and using three reference
# populations (CEU, YRI and JPT-CHB), finds outliers of one of those three
# reference population. You might want to skip the reference population using
# the "skip-ref-pops" option. You might need to change the "multiplier" option
# to be more or less stringent, according to you dataset. If you have a server
# with a DRMAA-compliant distributed resource management system, you can
# uncomment the "sge" and the "line-per-file-for-sge" options, to run this step
# in parallel.
# ##############################################################################

script = check_ethnicity
ceu-bfile = /PATH/TO/PLINK/BINARY/FILE/FOR/CEU_population
yri-bfile = /PATH/TO/PLINK/BINARY/FILE/FOR/YRI_population
jpt-chb-bfile = /PATH/TO/PLINK/BINARY/FILE/FOR/JPT-CHB_population
# skip-ref-pops
# min-nb-snp = 8000
# indep-pairwise = 50 5 0.1
# maf = 0.05
# sge
# line-per-file-for-sge = 100
# nb-components = 10
# outliers-of = CEU
# multiplier = 1.9
# xaxis = C1
# yaxis = C2
# format = png
# title = "C2 in function of C1 - MDS"
# xlabel = C1
# ylabel = C2
# create-scree-plot
# scree-plot-title "TITLE OF THE PLOT"
# sge-walltime = WRITE WALLTIME ONLY IF REQUIRED
# sge-nodes = WRITE NB NODES AND NB PROCESSOR PER NODE ONLY IF REQUIRED
# ibs-sge-walltime = WRITE WALLTIME ONLY IF REQUIRED
# ibs-sge-nodes = WRITE NB NODES AND NB PROCESSOR PER NODE ONLY IF REQUIRED