diff --git a/README.md b/README.md index 738ac75..af34597 100644 --- a/README.md +++ b/README.md @@ -18,51 +18,14 @@ There are two branches: * `Dockerfile` - a dockerfile to reproduce the environment used to run the pipeline. ## Pipeline Arguments: -* `--SAMPLEFILE` - The path to the sample file provided to the pipeline. This is a tab-separated file with one sample per line. Each line should contain a sample id, path to bam file, path to barcodes file (in that order!)." +* `--SAMPLEFILE` - The path to the sample file provided to the pipeline. This is a tab-separated file with one sample per line. Each line should contain a sample id, path to bam file, path to barcodes file (in that order!). * `--outdir` - The path to where the results will be saved. - -h5_on_irods": { - "type": "string", - "default": "yes", - "description": "Tells pipeline whether to look for the h5 file on IRODS or the FARM (default yes means look on IRODS)." - }, - "input_matrix": { - "type": "string", - "default": "no", - "description": "Tells pipeline whether input is a h5 file or 10x matrix format (default no means h5 file expected)." - }, - "gene_tag": { - "type": "string", - "default": "Gene", - "description": "If matrix format is used, tells pipeline whether to use exon only (Gene) or exon+intron (Genefull) matrix from STARsolo output." - }, - "qc_mode": { - "type": "integer", - "default": 1, - "description": "Tells pipeline which level of QC to compelte, 1 is the quickets but least depth, 3 is the slowest but most depth." - }, - "cells": { - "type": "integer", - "default": 5000, - "description": "The number of cells expected a priori from the experimental design (only needed in h5 mode)." - }, - "droplets": { - "type": "integer", - "default": 15000, - "description": "Number of total droplets (select a number that goes a few thousand barcodes into the “empty droplet plateau”). Include some droplets that you think are surely empty. But be aware that the larger this number, the longer the algorithm takes to run (linear). Only needed in h5 mode" - }, - "epochs": { - "type": "integer", - "default": 150, - "description": "Number of epochs to train, going above 300 will lead to overfitting." - }, - "fpr": { - "type": "number", - "default": 0.01, - "description": "Target false positive rate in (0, 1). A false positive is a true signal count that is erroneously removed. More background removal is accompanied by more signal removal at high values of FPR." - }, - "learn": { - "type": "number", - "default": 0.0001, - "description": "Training detail: lower learning rate for inference. A OneCycle learning rate schedule is used, where the upper learning rate is ten times this value. (For this value, probably do not exceed 1e-3)." - } +* `--h5_on_irods` - Tells pipeline whether to look for the h5 file on IRODS or the FARM (default yes means look on IRODS). +* `--input_matrix` - Tells pipeline whether input is a h5 file or 10x matrix format (default no means h5 file expected). +* `--gene_tag` - If matrix format is used, tells pipeline whether to use exon only (Gene) or exon+intron (Genefull) matrix from STARsolo output. This is our nomenclature for STARsolo output, you can change this to be any directory name and it will look in that directory for the `filtered` directory which contains `barcodes.tsv.gz`, `features.tsv.gz` and `matrix.mtx.gz`. +* `--qc_mode` - Tells pipeline which level of QC to compelte, 1 is the quickets but least depth, 3 is the slowest but most depth. +* `--cells` - The number of cells expected a priori from the experimental design (only needed in h5 mode). +* `--droplets` - Number of total droplets (select a number that goes a few thousand barcodes into the “empty droplet plateau”). Include some droplets that you think are surely empty. But be aware that the larger this number, the longer the algorithm takes to run (linear). Only needed in h5 mode. +* `--epochs` - Number of epochs to train, going above 300 will lead to overfitting. +* `--fpr` - Target false positive rate in (0, 1). A false positive is a true signal count that is erroneously removed. More background removal is accompanied by more signal removal at high values of FPR. +* `--learn` - Training detail: lower learning rate for inference. A OneCycle learning rate schedule is used, where the upper learning rate is ten times this value. (For this value, probably do not exceed 1e-3).