From 9291a14f889a043f3890a3954493751e39f2a02c Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Thu, 7 Dec 2023 13:13:20 -0600 Subject: [PATCH] style(#6): Run nix fmt --- README.md | 52 +- .../dragen-replay.json | 3518 ++++++++--------- .../tiny-2x1Xrepeats.v8/dragen-replay.json | 3518 ++++++++--------- doc/developer.md | 41 +- doc/readme.md | 65 +- flake.nix | 3 - src/compare.cpp | 9 +- src/include/align/Aligner.hpp | 4 +- src/include/align/Alignment.hpp | 9 +- src/include/align/AlignmentRescue.hpp | 12 +- src/include/align/CalculateRefStartEnd.hpp | 1 + src/include/align/Cigar.hpp | 5 +- src/include/align/InsertSizeDistribution.hpp | 1 - src/include/align/InsertSizeParameters.hpp | 1 + src/include/align/PairBuilder.hpp | 4 +- src/include/align/Query.hpp | 4 +- src/include/align/SinglePicker.hpp | 4 +- src/include/align/VectorSmithWaterman.hpp | 12 +- src/include/bam/Bam.hpp | 1 + src/include/bam/BamBlockReader.hpp | 3 +- src/include/common/Debug.hpp | 3 +- src/include/common/Threads.hpp | 2 +- src/include/fastq/FastqBlockReader.hpp | 3 +- src/include/fastq/FastqNRecordReader.hpp | 3 +- src/include/fastq/Token.hpp | 6 +- src/include/map/Mapper.hpp | 3 +- src/include/map/SeedPosition.hpp | 2 +- src/include/reference/Bucket.hpp | 1 + src/include/reference/ReferenceSequence.hpp | 43 +- src/include/sequences/CrcHasher.hpp | 4 +- src/lib/align/Aligner.cpp | 13 +- src/lib/align/AlignmentGenerator.cpp | 1 + src/lib/align/AlignmentRescue.cpp | 4 +- src/lib/align/Cigar.cpp | 513 +-- src/lib/align/PairBuilder.cpp | 7 +- src/lib/align/SmithWaterman.cpp | 3 +- src/lib/align/VectorSmithWaterman.cpp | 2 + src/lib/align/Wavefront.cpp | 1 + src/lib/align/tests/unit/CigarGtest.cpp | 6 +- src/lib/align/tests/unit/WavefrontGtest.cpp | 65 +- src/lib/bam/Tokenizer.cpp | 1 + src/lib/common/Debug.cpp | 1 + src/lib/common/Program.cpp | 5 +- src/lib/common/SystemCompatibility.cpp | 8 +- src/lib/fastq/Tokenizer.cpp | 1 + src/lib/fastq/tests/unit/TokenizerGtest.cpp | 3 +- .../Fastq2ReadTransformerGtest.cpp | 3 +- src/lib/map/ChainBuilder.cpp | 3 +- src/lib/map/Mapper.cpp | 35 +- .../tests/integration/ChainBuilderGtest.cpp | 1 - .../map/tests/integration/SeedChainGtest.cpp | 3 +- .../tests/unit/BestIntervalTrackerGtest.cpp | 3 +- src/lib/map/tests/unit/SeedPositionGtest.cpp | 4 +- src/lib/options/DragenOsOptions.cpp | 14 +- src/lib/reference/ExtendTableInterval.cpp | 3 +- src/lib/reference/HashtableConfig.cpp | 1 + src/lib/reference/ReferenceDir.cpp | 12 +- .../integration/ExtendTableIntervalGtest.cpp | 5 +- .../tests/integration/HashtableGtest.cpp | 1 - .../reference/tests/unit/HashRecordGtest.cpp | 1 - .../tests/unit/HashtableConfigGtest.cpp | 1 - .../tests/unit/ReferenceSequenceGtest.cpp | 3 +- src/lib/sequences/CrcHasher.cpp | 5 +- src/lib/sequences/Read.cpp | 3 +- .../sequences/tests/unit/CrcHasherGtest.cpp | 4 +- .../tests/unit/CrcPolynomialGtest.cpp | 1 - src/lib/sequences/tests/unit/SeedGtest.cpp | 9 +- src/lib/workflow/DualFastq2SamWorkflow.cpp | 28 +- src/lib/workflow/GenHashTableWorkflow.cpp | 3 +- src/lib/workflow/Input2SamWorkflow.cpp | 15 +- tests/ExtendTableGtest.cpp | 472 +-- tests/HashtableGtest.cpp | 1300 +++--- tests/generate.cpp | 63 +- 73 files changed, 5037 insertions(+), 4930 deletions(-) diff --git a/README.md b/README.md index d7b9f63..252e8cc 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Dragmap +# Dragmap Dragmap is the Dragen mapper/aligner Open Source Software. @@ -9,7 +9,7 @@ Dragmap is the Dragen mapper/aligner Open Source Software. Dragmap is available in the [Bioconda](https://anaconda.org/bioconda/dragmap) package manager.If you choose to install with bioconda, please first follow the bioconda install procedure from here: https://bioconda.github.io/user/install.html -Once you have bioconda installed, you should be able to install Dragmap with `conda install dragmap` (or `conda create -n myenv dragmap` to install in a new environment). This will install the `dragen-os` binary. +Once you have bioconda installed, you should be able to install Dragmap with `conda install dragmap` (or `conda create -n myenv dragmap` to install in a new environment). This will install the `dragen-os` binary. ### Build from source @@ -17,64 +17,49 @@ Once you have bioconda installed, you should be able to install Dragmap with `c Compilation was tested on CentOS 7 -* C++17 compatible compiler (e.g gcc-c++ >= 7.1) -* GNU make >= 3.82 -* Boost library : boost169-devel >= 1.69.0-1.el7 -* For unit tests : googletest (>= v1.6) -* Hardware: x86_86, 64GB RAM minimum -* OS: Centos >= 7.7 +- C++17 compatible compiler (e.g gcc-c++ >= 7.1) +- GNU make >= 3.82 +- Boost library : boost169-devel >= 1.69.0-1.el7 +- For unit tests : googletest (>= v1.6) +- Hardware: x86_86, 64GB RAM minimum +- OS: Centos >= 7.7 #### Install - The basic procedure is make Binary will be generated in ./build/release/ - Then optionally, to install to /usr/bin/ make install - - By default make will compile and launch unit tests. To disable unit tests, use HAS_GTEST=0, e.g. : - HAS_GTEST=0 make - -To compile with unit tests, if google test was installed in user space, it might be required to set GTEST_ROOT and LD_LIBRARY_PATH to where gtest was installed, e.g. : +To compile with unit tests, if google test was installed in user space, it might be required to set GTEST_ROOT and LD_LIBRARY_PATH to where gtest was installed, e.g. : export GTEST_ROOT=/home/username/lib/gtest export LD_LIBRARY_PATH=/home/username/lib/gtest/lib - - - #### Other variables controlling the build process: +- GCC_BASE +- CXX +- BOOST_ROOT +- BOOST_INCLUDEDIR +- BOOST_LIBRARYDIR -* GCC_BASE -* CXX -* BOOST_ROOT -* BOOST_INCLUDEDIR -* BOOST_LIBRARYDIR - - - +## Basic command line usage - -## Basic command line usage - -### Command line options +### Command line options dragen-os --help - -### Build hash table of a reference fasta file +### Build hash table of a reference fasta file dragen-os --build-hash-table true --ht-reference reference.fasta --output-directory /home/data/reference/ @@ -84,7 +69,7 @@ To compile with unit tests, if google test was installed in user space, it might ### Align paired-end reads : -Output result to standard output +Output result to standard output dragen-os -r /home/data/reference/ -1 reads_1.fastq.gz -2 reads_2.fastq.gz > result.sam @@ -96,7 +81,6 @@ Or directly to a file : dragen-os -r /home/data/reference/ -1 reads_1.fastq.gz > result.sam - ## Pull requests We are not accepting pull requests into this repository at this time, in particular because of conflicts concerns with our internal repository. For any bug report / recommendation / feature request, please open an issue. diff --git a/data/tiny/tiny-2x1Xrepeats-1Xinv.v8/dragen-replay.json b/data/tiny/tiny-2x1Xrepeats-1Xinv.v8/dragen-replay.json index 4306611..07035db 100644 --- a/data/tiny/tiny-2x1Xrepeats-1Xinv.v8/dragen-replay.json +++ b/data/tiny/tiny-2x1Xrepeats-1Xinv.v8/dragen-replay.json @@ -1,1762 +1,1762 @@ { - "command_line": "\/staging\/rpetrovski\/dragen\/objtree_el7-master\/bin\/dragen --build-hash-table true --ht-reference tiny.fasta --output-directory tiny.v8", - "dragen_config": [ - { - "name": "Aligner.align-direction", - "value": "4" - }, - { - "name": "Aligner.aln-en-mask", - "value": "15" - }, - { - "name": "Aligner.aln-enable", - "value": "1" - }, - { - "name": "Aligner.backtrace-delay", - "value": "8" - }, - { - "name": "Aligner.dedup-min-qual", - "value": "15" - }, - { - "name": "Aligner.disable-lfsr", - "value": "0" - }, - { - "name": "Aligner.en-alt-hap-aln", - "value": "1" - }, - { - "name": "Aligner.en-chimeric-aln", - "value": "1" - }, - { - "name": "Aligner.exon-jump-en", - "value": "1" - }, - { - "name": "Aligner.filt-clip1-pair", - "value": "10" - }, - { - "name": "Aligner.filt-clip1-unpr", - "value": "-5" - }, - { - "name": "Aligner.filt-clip2-pair", - "value": "5" - }, - { - "name": "Aligner.filt-clip2-unpr", - "value": "-10" - }, - { - "name": "Aligner.filt-min-qual", - "value": "0" - }, - { - "name": "Aligner.fix-overlap-mapq", - "value": "0" - }, - { - "name": "Aligner.gap-ext-pen", - "value": "1" - }, - { - "name": "Aligner.gap-open-pen", - "value": "6" - }, - { - "name": "Aligner.global", - "value": "0" - }, - { - "name": "Aligner.hard-clips", - "value": "6" - }, - { - "name": "Aligner.hw-mapq-max", - "value": "250" - }, - { - "name": "Aligner.leftalign-mode", - "value": "2" - }, - { - "name": "Aligner.mapq-coeff", - "value": "152" - }, - { - "name": "Aligner.mapq-floor-1snp", - "value": "0" - }, - { - "name": "Aligner.mapq-max", - "value": "60" - }, - { - "name": "Aligner.mapq-min-len", - "value": "50" - }, - { - "name": "Aligner.mapq-strict-sjs", - "value": "0" - }, - { - "name": "Aligner.match-score", - "value": "1" - }, - { - "name": "Aligner.max-rescues", - "value": "1023" - }, - { - "name": "Aligner.max-stitch-olap", - "value": "48" - }, - { - "name": "Aligner.min-overhang", - "value": "6" - }, - { - "name": "Aligner.min-overhang-ann", - "value": "6" - }, - { - "name": "Aligner.min-score-coeff", - "value": "0.0" - }, - { - "name": "Aligner.mismatch-pen", - "value": "4" - }, - { - "name": "Aligner.no-align-score", - "value": "-8388608" - }, - { - "name": "Aligner.no-ambig-strand", - "value": "1" - }, - { - "name": "Aligner.no-noncan-motifs", - "value": "1" - }, - { - "name": "Aligner.no-unclip-score", - "value": "1" - }, - { - "name": "Aligner.no-unpaired", - "value": "0" - }, - { - "name": "Aligner.paired-mate-info", - "value": "1" - }, - { - "name": "Aligner.pe-max-penalty", - "value": "255" - }, - { - "name": "Aligner.pe-orientation", - "value": "0" - }, - { - "name": "Aligner.pe-sample-max-insert", - "value": "65535" - }, - { - "name": "Aligner.resc-ifpair-len", - "value": "48" - }, - { - "name": "Aligner.resc-nopair-len", - "value": "0" - }, - { - "name": "Aligner.rescue-ceil-factor", - "value": "3" - }, - { - "name": "Aligner.rescue-hifreq", - "value": "0" - }, - { - "name": "Aligner.rescue-kmer-len", - "value": "32" - }, - { - "name": "Aligner.rescue-max-snps", - "value": "7" - }, - { - "name": "Aligner.rna-max-insert", - "value": "4000000" - }, - { - "name": "Aligner.rna-pair-pen-rat", - "value": "5.71875" - }, - { - "name": "Aligner.sample-mapq0", - "value": "1" - }, - { - "name": "Aligner.sec-aligns", - "value": "0" - }, - { - "name": "Aligner.sec-aligns-hard", - "value": "0" - }, - { - "name": "Aligner.sec-phred-delta", - "value": "0" - }, - { - "name": "Aligner.sec-score-delta", - "value": "0" - }, - { - "name": "Aligner.supp-aligns", - "value": "3" - }, - { - "name": "Aligner.supp-as-sec", - "value": "0" - }, - { - "name": "Aligner.sw-all", - "value": "0" - }, - { - "name": "Aligner.sw-burst-diffs", - "value": "1" - }, - { - "name": "Aligner.sw-early-max", - "value": "256" - }, - { - "name": "Aligner.sw-extra-intvl", - "value": "1" - }, - { - "name": "Aligner.unclip-score", - "value": "5" - }, - { - "name": "Aligner.unpaired-pen", - "value": "80" - }, - { - "name": "Aligner.xs-pair-penalty", - "value": "25" - }, - { - "name": "Mapper.adapter-times", - "value": "0" - }, - { - "name": "Mapper.ann-sj-max-indel", - "value": "10" - }, - { - "name": "Mapper.chain-diam-lim", - "value": "8" - }, - { - "name": "Mapper.chain-rad-lim", - "value": "5" - }, - { - "name": "Mapper.cut-bases", - "value": "0" - }, - { - "name": "Mapper.dark-base", - "value": "2" - }, - { - "name": "Mapper.edit-chain-limit", - "value": "29" - }, - { - "name": "Mapper.edit-mode", - "value": "0" - }, - { - "name": "Mapper.edit-read-len", - "value": "100" - }, - { - "name": "Mapper.edit-seed-num", - "value": "6" - }, - { - "name": "Mapper.filt-good-qual", - "value": "0" - }, - { - "name": "Mapper.filter-len-ratio", - "value": "4" - }, - { - "name": "Mapper.intvl-max-hits", - "value": "16" - }, - { - "name": "Mapper.intvl-min-chains", - "value": "8" - }, - { - "name": "Mapper.intvl-sample-hits", - "value": "16" - }, - { - "name": "Mapper.intvl-seed-length", - "value": "60" - }, - { - "name": "Mapper.intvl-seed-longer", - "value": "8" - }, - { - "name": "Mapper.intvl-target-hits", - "value": "32" - }, - { - "name": "Mapper.map-orientations", - "value": "0" - }, - { - "name": "Mapper.max-dram-reqs", - "value": "0" - }, - { - "name": "Mapper.max-hifreq-hits", - "value": "16" - }, - { - "name": "Mapper.max-intron-bases", - "value": "200000" - }, - { - "name": "Mapper.max-lowq-bases", - "value": "4294967295" - }, - { - "name": "Mapper.max-lowq-ratio", - "value": "4294967295" - }, - { - "name": "Mapper.max-read-len", - "value": "4294967295" - }, - { - "name": "Mapper.max-seed-chains", - "value": "511" - }, - { - "name": "Mapper.max-splice-gap", - "value": "150" - }, - { - "name": "Mapper.max-splice-olap", - "value": "16" - }, - { - "name": "Mapper.min-intron-bases", - "value": "20" - }, - { - "name": "Mapper.min-trim-bases", - "value": "0" - }, - { - "name": "Mapper.n-base-qual", - "value": "2" - }, - { - "name": "Mapper.polyg-cutoff", - "value": "0" - }, - { - "name": "Mapper.qual-cutoff", - "value": "0" - }, - { - "name": "Mapper.read-edge-seeds", - "value": "0" - }, - { - "name": "Mapper.rna-filt-ratio", - "value": "25" - }, - { - "name": "Mapper.rna-max-covg-gap", - "value": "150" - }, - { - "name": "Mapper.rna-max-recurs", - "value": "65536" - }, - { - "name": "Mapper.rna-span-log-min", - "value": "13" - }, - { - "name": "Mapper.seed-density", - "value": "0.5" - }, - { - "name": "Mapper.seed-max-age", - "value": "31" - }, - { - "name": "Mapper.seed-old-age", - "value": "9" - }, - { - "name": "Mapper.splice-olap-adj", - "value": "4" - }, - { - "name": "Mapper.trace-mode", - "value": "0" - }, - { - "name": "Mapper.trace-offset", - "value": "0" - }, - { - "name": "Mapper.trace-read-id", - "value": "0" - }, - { - "name": "append-read-index-to-name", - "value": "false" - }, - { - "name": "assert-valid-cigar", - "value": "false" - }, - { - "name": "autodetect-reference-validate", - "value": "false" - }, - { - "name": "bam2dbam-threads", - "value": "12" - }, - { - "name": "bin-split-target-size", - "value": "104857600" - }, - { - "name": "bin-split-threshold", - "value": "7158278826" - }, - { - "name": "bin_memory", - "value": "21474836480" - }, - { - "name": "binner-use-odirect", - "value": "true" - }, - { - "name": "bqsr-context-low-quality-tail", - "value": "2" - }, - { - "name": "bqsr-cycle-indel-context", - "value": "3" - }, - { - "name": "bqsr-cycle-mismatch-context", - "value": "2" - }, - { - "name": "bqsr-emit-indel-tags", - "value": "true" - }, - { - "name": "bqsr-enable-recal-indels", - "value": "true" - }, - { - "name": "bqsr-match-gatk", - "value": "true" - }, - { - "name": "bqsr-max-cycle-value", - "value": "500" - }, - { - "name": "build-hash-table", - "value": "true" - }, - { - "name": "c2s_aligner_packet_size", - "value": "524288" - }, - { - "name": "c2s_aligner_pool_size", - "value": "64" - }, - { - "name": "c2s_decomp_packet_size", - "value": "1048576" - }, - { - "name": "c2s_decomp_pool_size", - "value": "64" - }, - { - "name": "c2s_graph_packet_size", - "value": "65536" - }, - { - "name": "c2s_graph_pool_size", - "value": "256" - }, - { - "name": "c2s_hmm_packet_size", - "value": "4096" - }, - { - "name": "c2s_hmm_pool_size", - "value": "128" - }, - { - "name": "c2s_smw_packet_size", - "value": "4096" - }, - { - "name": "c2s_smw_pool_size", - "value": "128" - }, - { - "name": "cgvcf-num-file-scan-threads", - "value": "8" - }, - { - "name": "cgvcf-save-tmp-files", - "value": "false" - }, - { - "name": "cgvcf-split-chromosomes", - "value": "false" - }, - { - "name": "combine-samples-by-name", - "value": "false" - }, - { - "name": "credentials-1", - "value": "" - }, - { - "name": "credentials-2", - "value": "" - }, - { - "name": "credentials-3", - "value": "" - }, - { - "name": "dbam2bam_threads", - "value": "32" - }, - { - "name": "debug", - "value": "0" - }, - { - "name": "disable_reg_validation", - "value": "0" - }, - { - "name": "distinct-dbam-input-format", - "value": "true" - }, - { - "name": "dump-hang-diag-first", - "value": "true" - }, - { - "name": "dump-map-align-registers", - "value": "0" - }, - { - "name": "dump_config", - "value": "0" - }, - { - "name": "dump_registers", - "value": "0" - }, - { - "name": "dupmark-version", - "value": "sort" - }, - { - "name": "echo_aligner_log", - "value": "0" - }, - { - "name": "echo_general_log", - "value": "0" - }, - { - "name": "echo_mapper_log", - "value": "0" - }, - { - "name": "enable-auto-multifile", - "value": "true" - }, - { - "name": "enable-bqsr", - "value": "false" - }, - { - "name": "enable-deterministic-sort", - "value": "true" - }, - { - "name": "enable-duplicate-marking", - "value": "false" - }, - { - "name": "enable-hang-diag", - "value": "true" - }, - { - "name": "enable-http-server", - "value": "false" - }, - { - "name": "enable-map-align", - "value": "true" - }, - { - "name": "enable-methylation-calling", - "value": "true" - }, - { - "name": "enable-pstack", - "value": "true" - }, - { - "name": "enable-public-bitstream", - "value": "false" - }, - { - "name": "enable-rna-quantification", - "value": "false" - }, - { - "name": "enable-sampling", - "value": "true" - }, - { - "name": "enable-single-cell-rna", - "value": "false" - }, - { - "name": "enable-sort", - "value": "true" - }, - { - "name": "enable-spin", - "value": "false" - }, - { - "name": "enable-umi-stat-estimator", - "value": "false" - }, - { - "name": "enable-variant-caller", - "value": "false" - }, - { - "name": "enable-vcf-indexing", - "value": "true" - }, - { - "name": "enable-watchdog", - "value": "true" - }, - { - "name": "enable-write-input-dbam", - "value": "false" - }, - { - "name": "evict_all_intermediate_results", - "value": "false" - }, - { - "name": "fastq-n-quality", - "value": "2" - }, - { - "name": "fastq-offset", - "value": "33" - }, - { - "name": "fastq2dbam_ratio", - "value": "2" - }, - { - "name": "fastq_block_size", - "value": "1048576" - }, - { - "name": "fastq_pool_size", - "value": "4" - }, - { - "name": "fastqc-adapter-file", - "value": "adapter_sequences.fasta" - }, - { - "name": "fastqc-granularity", - "value": "7" - }, - { - "name": "fastqc-only", - "value": "false" - }, - { - "name": "filter-flags-from-output", - "value": "0" - }, - { - "name": "gc-metrics-cover-percent", - "value": "75" - }, - { - "name": "gc-metrics-enable", - "value": "false" - }, - { - "name": "gc-metrics-num-bins", - "value": "5" - }, - { - "name": "gc-metrics-only-covered", - "value": "false" - }, - { - "name": "gc-metrics-window-size", - "value": "100" - }, - { - "name": "generate-en-tags", - "value": "false" - }, - { - "name": "generate-md-tags", - "value": "false" - }, - { - "name": "generate-xq-tags", - "value": "true" - }, - { - "name": "generate-zs-tags", - "value": "false" - }, - { - "name": "ht-anchor-bin-bits", - "value": "0" - }, - { - "name": "ht-cost-coeff-seed-freq", - "value": "0.5" - }, - { - "name": "ht-cost-coeff-seed-len", - "value": "1" - }, - { - "name": "ht-cost-penalty", - "value": "0" - }, - { - "name": "ht-cost-penalty-incr", - "value": "0.69999999999999996" - }, - { - "name": "ht-crc-extended", - "value": "0" - }, - { - "name": "ht-crc-primary", - "value": "0" - }, - { - "name": "ht-dump-int-params", - "value": "0" - }, - { - "name": "ht-ext-rec-cost", - "value": "4" - }, - { - "name": "ht-max-dec-factor", - "value": "1" - }, - { - "name": "ht-max-ext-incr", - "value": "12" - }, - { - "name": "ht-max-ext-seed-len", - "value": "0" - }, - { - "name": "ht-max-seed-freq", - "value": "16" - }, - { - "name": "ht-max-seed-freq-len", - "value": "98" - }, - { - "name": "ht-max-table-chunks", - "value": "0" - }, - { - "name": "ht-mem-limit", - "value": "0GB" - }, - { - "name": "ht-methylated", - "value": "false" - }, - { - "name": "ht-min-repair-prob", - "value": "0.20000000000000001" - }, - { - "name": "ht-override-size-check", - "value": "0" - }, - { - "name": "ht-pri-max-seed-freq", - "value": "0" - }, - { - "name": "ht-rand-hit-extend", - "value": "8" - }, - { - "name": "ht-rand-hit-hifreq", - "value": "1" - }, - { - "name": "ht-ref-seed-interval", - "value": "1" - }, - { - "name": "ht-reference", - "value": "tiny.fasta" - }, - { - "name": "ht-repair-strategy", - "value": "0" - }, - { - "name": "ht-seed-len", - "value": "21" - }, - { - "name": "ht-size", - "value": "0GB" - }, - { - "name": "ht-soft-seed-freq-cap", - "value": "12" - }, - { - "name": "ht-target-seed-freq", - "value": "4" - }, - { - "name": "ht-test-only", - "value": "0" - }, - { - "name": "ht-write-hash-bin", - "value": "0" - }, - { - "name": "http-server-port", - "value": "7993" - }, - { - "name": "input-qname-suffix-delimiter", - "value": "\/" - }, - { - "name": "linkedreads-correction-table1", - "value": "linkedreads_corrections_1.txt" - }, - { - "name": "linkedreads-correction-table2", - "value": "linkedreads_corrections_2.txt" - }, - { - "name": "linkedreads-correction-table3", - "value": "linkedreads_corrections_3.txt" - }, - { - "name": "linkedreads-enable", - "value": "false" - }, - { - "name": "logfile_prefix", - "value": "\/opt\/edico\/logs\/" - }, - { - "name": "mapper_cigar", - "value": "0" - }, - { - "name": "max_bin_size", - "value": "943718400" - }, - { - "name": "max_ios_inflight", - "value": "1024" - }, - { - "name": "methylation-TAPS", - "value": "false" - }, - { - "name": "methylation-generate-cytosine-report", - "value": "false" - }, - { - "name": "methylation-generate-mbias-report", - "value": "false" - }, - { - "name": "methylation-match-bismark", - "value": "false" - }, - { - "name": "methylation-protocol", - "value": "none" - }, - { - "name": "methylation-reports-only", - "value": "false" - }, - { - "name": "min-predicted-output-gb", - "value": "200" - }, - { - "name": "multiplier", - "value": "1" - }, - { - "name": "no-reset", - "value": "false" - }, - { - "name": "output-directory", - "value": "tiny.v8" - }, - { - "name": "output-format", - "value": "bam" - }, - { - "name": "pair-by-name", - "value": "true" - }, - { - "name": "pair-suffix-delimiter", - "value": "\/" - }, - { - "name": "partition-on-compression-bottleneck", - "value": "false" - }, - { - "name": "pe-stats-continuous-update", - "value": "false" - }, - { - "name": "pe-stats-interval-delay", - "value": "5" - }, - { - "name": "pe-stats-interval-memory", - "value": "10" - }, - { - "name": "pe-stats-interval-size", - "value": "25000" - }, - { - "name": "pe-stats-sample-size", - "value": "100000" - }, - { - "name": "pe-stats-update-log-only", - "value": "false" - }, - { - "name": "preserve-bqsr-tags", - "value": "false" - }, - { - "name": "preserve-map-align-order", - "value": "false" - }, - { - "name": "qc-indel-denovo-quality-threshold", - "value": "0.02" - }, - { - "name": "qc-snp-denovo-quality-threshold", - "value": "0.050000000000000003" - }, - { - "name": "read-trimmers", - "value": "none" - }, - { - "name": "recordset-memory", - "value": "1073741824" - }, - { - "name": "reg_errors_are_warnings", - "value": "0" - }, - { - "name": "remove-duplicates", - "value": "false" - }, - { - "name": "repeat-genotype-enable", - "value": "false" - }, - { - "name": "repeat-genotype-min-anchor-mapq", - "value": "60" - }, - { - "name": "repeat-genotype-min-baseq", - "value": "20" - }, - { - "name": "repeat-genotype-min-score", - "value": "0.90000000000000002" - }, - { - "name": "repeat-genotype-read-depth", - "value": "0" - }, - { - "name": "repeat-genotype-read-length", - "value": "0" - }, - { - "name": "repeat-genotype-region-extension-length", - "value": "1000" - }, - { - "name": "repeat-genotype-skip-unaligned", - "value": "true" - }, - { - "name": "repeat-genotype-specs", - "value": "" - }, - { - "name": "rna-ann-sj-min-len", - "value": "6" - }, - { - "name": "rna-cv-min-expression", - "value": "0" - }, - { - "name": "rna-gf-aggressive-filters", - "value": "false" - }, - { - "name": "rna-gf-blast-pairs", - "value": "" - }, - { - "name": "rna-gf-coverage-lookup-window", - "value": "1000" - }, - { - "name": "rna-gf-enriched-genes", - "value": "" - }, - { - "name": "rna-gf-enriched-only", - "value": "false" - }, - { - "name": "rna-gf-exon-snap", - "value": "50" - }, - { - "name": "rna-gf-mate-overhang", - "value": "8" - }, - { - "name": "rna-gf-max-partners", - "value": "3" - }, - { - "name": "rna-gf-merge-calls", - "value": "true" - }, - { - "name": "rna-gf-min-alt-to-ref", - "value": "0.00999999978" - }, - { - "name": "rna-gf-min-anchor", - "value": "12" - }, - { - "name": "rna-gf-min-blast-pairs-eval", - "value": "1e-100" - }, - { - "name": "rna-gf-min-breakpoint-mapq", - "value": "20" - }, - { - "name": "rna-gf-min-cis-distance", - "value": "200000" - }, - { - "name": "rna-gf-min-covered-bases", - "value": "125" - }, - { - "name": "rna-gf-min-covered-bases-uncaptured", - "value": "79" - }, - { - "name": "rna-gf-min-neighbor-dist", - "value": "15" - }, - { - "name": "rna-gf-min-score", - "value": "0.5" - }, - { - "name": "rna-gf-min-score-ratio", - "value": "0.150000006" - }, - { - "name": "rna-gf-min-support", - "value": "2" - }, - { - "name": "rna-gf-min-support-be", - "value": "10" - }, - { - "name": "rna-gf-min-unique-alignments", - "value": "2" - }, - { - "name": "rna-gf-num-threads", - "value": "4" - }, - { - "name": "rna-gf-ref-anchor", - "value": "8" - }, - { - "name": "rna-gf-restrict-genes", - "value": "true" - }, - { - "name": "rna-gf-score-model", - "value": "" - }, - { - "name": "rna-mapq-unique", - "value": "0" - }, - { - "name": "rna-quantification-fld-max", - "value": "1000" - }, - { - "name": "rna-quantification-fld-mean", - "value": "250" - }, - { - "name": "rna-quantification-fld-sd", - "value": "25" - }, - { - "name": "rna-quantification-full-concordance", - "value": "false" - }, - { - "name": "rna-quantification-gc-bias", - "value": "true" - }, - { - "name": "rna-quantification-inference-max", - "value": "10000" - }, - { - "name": "rna-quantification-inference-min", - "value": "100" - }, - { - "name": "rna-quantification-init-uniform", - "value": "0" - }, - { - "name": "rna-quantification-library-type", - "value": "A" - }, - { - "name": "rna-quantification-tlen-min", - "value": "500" - }, - { - "name": "rna-quantification-use-em", - "value": "0" - }, - { - "name": "rna_aligner_buffer_size", - "value": "67108864" - }, - { - "name": "rna_aligner_buffers", - "value": "0" - }, - { - "name": "s2c_dbam_block_size", - "value": "65536" - }, - { - "name": "s2c_dbam_pool_size", - "value": "32" - }, - { - "name": "s2c_decomp_block_size", - "value": "262144" - }, - { - "name": "s2c_decomp_pool_size", - "value": "64" - }, - { - "name": "s2c_graph_block_size", - "value": "16384" - }, - { - "name": "s2c_graph_pool_size", - "value": "256" - }, - { - "name": "s2c_hmm_block_size", - "value": "16384" - }, - { - "name": "s2c_hmm_pool_size", - "value": "16384" - }, - { - "name": "s2c_phase1_packet_size", - "value": "16384" - }, - { - "name": "s2c_phase2_packet_size", - "value": "16384" - }, - { - "name": "s2c_smw_block_size", - "value": "16384" - }, - { - "name": "s2c_smw_pool_size", - "value": "2048" - }, - { - "name": "single-cell-barcode", - "value": "16" - }, - { - "name": "single-cell-count-introns", - "value": "false" - }, - { - "name": "single-cell-global-umi", - "value": "false" - }, - { - "name": "single-cell-type", - "value": "simple" - }, - { - "name": "single-cell-umi", - "value": "8" - }, - { - "name": "soft-read-trimmers", - "value": "polyg" - }, - { - "name": "sort_buffer_size", - "value": "1048576" - }, - { - "name": "stop-at-read", - "value": "0" - }, - { - "name": "strip-input-qname-suffixes", - "value": "true" - }, - { - "name": "sv-denovo-threshold", - "value": "20" - }, - { - "name": "sv-enable-rrm-for-insertions-in-cancer-calling-modes", - "value": "true" - }, - { - "name": "sv-enable-rrm-for-insertions-in-germline-calling-modes", - "value": "true" - }, - { - "name": "sv-generate-evidence-bam", - "value": "false" - }, - { - "name": "sv-graph-node-max-edge-count", - "value": "10" - }, - { - "name": "sv-hyper-sensitivity", - "value": "false" - }, - { - "name": "sv-min-candidate-spanning-count", - "value": "3" - }, - { - "name": "sv-min-candidate-variant-size", - "value": "8" - }, - { - "name": "sv-min-diploid-variant-score", - "value": "10" - }, - { - "name": "sv-min-edge-observations", - "value": "3" - }, - { - "name": "sv-min-pass-diploid-gt-score", - "value": "15" - }, - { - "name": "sv-min-pass-diploid-variant-score", - "value": "20" - }, - { - "name": "sv-min-pass-somatic-score", - "value": "30" - }, - { - "name": "sv-min-scored-variant-size", - "value": "50" - }, - { - "name": "sv-min-somatic-score", - "value": "10" - }, - { - "name": "sv-mobile-element-sequences", - "value": "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACGAGGTCAGGAGATCGAGACCATCCCGGCTAAAACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAATTAGCCGGGCGTAGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCCCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTC GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGTGGATCATGAGGTCAGGAGATCGAGACCATCCTGGCTAACAAGGTGAAACCCCGTCTCTACTAAAAATACAAAAAATTAGCCGGGCGCGGTGGCGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCGGGAAGCGGAGCTTGCAGTGAGCCGAGATTGCGCCACTGCAGTCCGCAGTCCGGCCTGGGCGACAGAGCGAGACTCCGTCTC " - }, - { - "name": "sv-rna-min-candidate-variant-size", - "value": "1000" - }, - { - "name": "sv-use-overlap-pair-evidence", - "value": "false" - }, - { - "name": "trim-disable-mapping", - "value": "false" - }, - { - "name": "trim-filter-dummy-len", - "value": "10" - }, - { - "name": "trim-filter-set-flag", - "value": "true" - }, - { - "name": "trim-min-len-read1", - "value": "20" - }, - { - "name": "trim-min-len-read2", - "value": "20" - }, - { - "name": "trim-polyg-early-exit-threshold", - "value": "-500" - }, - { - "name": "trim-polyg-g-score-r1-3prime", - "value": "15" - }, - { - "name": "trim-polyg-g-score-r1-5prime", - "value": "0" - }, - { - "name": "trim-polyg-g-score-r2-3prime", - "value": "15" - }, - { - "name": "trim-polyg-g-score-r2-5prime", - "value": "0" - }, - { - "name": "trim-polyg-kmer-len", - "value": "25" - }, - { - "name": "trim-polyg-kmer-non-g", - "value": "2" - }, - { - "name": "trim-polyg-min-trim-r1-3prime", - "value": "6" - }, - { - "name": "trim-polyg-min-trim-r1-5prime", - "value": "6" - }, - { - "name": "trim-polyg-min-trim-r2-3prime", - "value": "6" - }, - { - "name": "trim-polyg-min-trim-r2-5prime", - "value": "6" - }, - { - "name": "umi-base-representation-min-ratio", - "value": "0.5" - }, - { - "name": "umi-correction-scheme", - "value": "lookup" - }, - { - "name": "umi-correction-table", - "value": "umi_correction_table.txt" - }, - { - "name": "umi-enable", - "value": "false" - }, - { - "name": "umi-enable-contextual-corrections", - "value": "true" - }, - { - "name": "umi-enable-duplex-merging", - "value": "true" - }, - { - "name": "umi-enable-probability-model-merging", - "value": "false" - }, - { - "name": "umi-enable-shift-corrections", - "value": "true" - }, - { - "name": "umi-enable-trimming", - "value": "true" - }, - { - "name": "umi-end-mask-length", - "value": "3" - }, - { - "name": "umi-generate-bam-tags", - "value": "true" - }, - { - "name": "umi-masked-base-qual", - "value": "1" - }, - { - "name": "umi-max-base-quality", - "value": "63" - }, - { - "name": "umi-mem-throttle-gb", - "value": "30" - }, - { - "name": "umi-min-map-quality", - "value": "0" - }, - { - "name": "umi-min-reads-per-region", - "value": "4096" - }, - { - "name": "umi-min-supporting-reads", - "value": "2" - }, - { - "name": "umi-padding", - "value": "A" - }, - { - "name": "umi-preserve-input-tags", - "value": "false" - }, - { - "name": "umi-probability-merging-duplex-merging-thres", - "value": "1" - }, - { - "name": "umi-probability-merging-max-transition-ratio", - "value": "10000" - }, - { - "name": "umi-probability-merging-min-isize-freq", - "value": "0.001" - }, - { - "name": "umi-probability-merging-seq-error", - "value": "0.001" - }, - { - "name": "umi-probability-merging-simplex-fuzzy-merging-thres", - "value": "1" - }, - { - "name": "umi-probability-merging-simplex-merging-thres", - "value": "1" - }, - { - "name": "umi-random-merge-factor", - "value": "2" - }, - { - "name": "umi-read-minority-min-ratio", - "value": "0.5" - }, - { - "name": "umi-soft-clip-ratio", - "value": "0.5" - }, - { - "name": "umi-source", - "value": "qname" - }, - { - "name": "umi-start-mask-length", - "value": "1" - }, - { - "name": "umi-stat-estimation-max-fragment-count", - "value": "30" - }, - { - "name": "umi-stat-estimation-max-fragment-size", - "value": "1000" - }, - { - "name": "umi-stat-estimation-max-interval-number", - "value": "50" - }, - { - "name": "umi-stat-estimation-min-probability-unique-fragment", - "value": "0.998" - }, - { - "name": "umi-stat-estimation-umi-jumping-estimation-method", - "value": "simplex" - }, - { - "name": "umi-trim-allowed-mismatches", - "value": "1" - }, - { - "name": "umi-verbose-metrics", - "value": "false" - }, - { - "name": "use-mock-config", - "value": "false" - }, - { - "name": "vc-active-only", - "value": "false" - }, - { - "name": "vc-decoy-contigs", - "value": "NC_007605 hs37d5 chrUn_KN707*v1_decoy chrUn_JTFH0100*v1_decoy KN707*.1 JTFH0100*.1 chrEBV CMV HBV HCV* HIV* KSHV HTLV* MCV SV40 HPV* " - }, - { - "name": "vc-emit-ref-confidence", - "value": "NONE" - }, - { - "name": "vc-emit-zero-coverage-intervals", - "value": "true" - }, - { - "name": "vc-enable-basecall-filter", - "value": "false" - }, - { - "name": "vc-enable-deterministic-run", - "value": "true" - }, - { - "name": "vc-enable-hw-hmm", - "value": "true" - }, - { - "name": "vc-enable-hw-hmm-dump-receiver-data", - "value": "false" - }, - { - "name": "vc-enable-hw-hmm-dump-sender-data", - "value": "false" - }, - { - "name": "vc-enable-hw-hmm-dump-worker-data", - "value": "false" - }, - { - "name": "vc-enable-hw-smw", - "value": "true" - }, - { - "name": "vc-enable-hw-smw-dump-receiver-data", - "value": "false" - }, - { - "name": "vc-enable-hw-smw-dump-sender-data", - "value": "false" - }, - { - "name": "vc-enable-hw-smw-dump-worker-data", - "value": "false" - }, - { - "name": "vc-hw-hmm-timeout", - "value": "100000" - }, - { - "name": "vc-limit-genomecov-output", - "value": "false" - }, - { - "name": "vc-max-alternate-alleles", - "value": "6" - }, - { - "name": "vc-max-haps-per-job", - "value": "2" - }, - { - "name": "vc-sw-cell-width", - "value": "16" - }, - { - "name": "vc-sw-instruction-set", - "value": "sse2" - }, - { - "name": "vc-sw-mode", - "value": "0" - }, - { - "name": "vqsr-lod-cutoff", - "value": "-5.0" - }, - { - "name": "vqsr-num-gaussians", - "value": "8,2,4,2" - }, - { - "name": "watchdog-active-timeout", - "value": "600" - }, - { - "name": "watchdog-dump-processes", - "value": "true;" - }, - { - "name": "watchdog-exit-on-hang", - "value": "true" - }, - { - "name": "watchdog-freemem-threshold", - "value": "2GB" - }, - { - "name": "watchdog-idle-timeout", - "value": "600" - }, - { - "name": "watchdog-max-threads", - "value": "8388608" - }, - { - "name": "watchdog-poll-interval", - "value": "1" - }, - { - "name": "watchdog-resources-monitored", - "value": "THREADS IO MEMORY " - }, - { - "name": "watchdog-verbose-logging", - "value": "true" - } - ], - "system": { - "dragen_version": "07.021.572.3.7.0-205-gbcf57830", - "nodename": "ussd-tst-drgn33.illumina.com", - "kernel_release": "3.10.0-1062.12.1.el7.x86_64" + "command_line": "/staging/rpetrovski/dragen/objtree_el7-master/bin/dragen --build-hash-table true --ht-reference tiny.fasta --output-directory tiny.v8", + "dragen_config": [ + { + "name": "Aligner.align-direction", + "value": "4" + }, + { + "name": "Aligner.aln-en-mask", + "value": "15" + }, + { + "name": "Aligner.aln-enable", + "value": "1" + }, + { + "name": "Aligner.backtrace-delay", + "value": "8" + }, + { + "name": "Aligner.dedup-min-qual", + "value": "15" + }, + { + "name": "Aligner.disable-lfsr", + "value": "0" + }, + { + "name": "Aligner.en-alt-hap-aln", + "value": "1" + }, + { + "name": "Aligner.en-chimeric-aln", + "value": "1" + }, + { + "name": "Aligner.exon-jump-en", + "value": "1" + }, + { + "name": "Aligner.filt-clip1-pair", + "value": "10" + }, + { + "name": "Aligner.filt-clip1-unpr", + "value": "-5" + }, + { + "name": "Aligner.filt-clip2-pair", + "value": "5" + }, + { + "name": "Aligner.filt-clip2-unpr", + "value": "-10" + }, + { + "name": "Aligner.filt-min-qual", + "value": "0" + }, + { + "name": "Aligner.fix-overlap-mapq", + "value": "0" + }, + { + "name": "Aligner.gap-ext-pen", + "value": "1" + }, + { + "name": "Aligner.gap-open-pen", + "value": "6" + }, + { + "name": "Aligner.global", + "value": "0" + }, + { + "name": "Aligner.hard-clips", + "value": "6" + }, + { + "name": "Aligner.hw-mapq-max", + "value": "250" + }, + { + "name": "Aligner.leftalign-mode", + "value": "2" + }, + { + "name": "Aligner.mapq-coeff", + "value": "152" + }, + { + "name": "Aligner.mapq-floor-1snp", + "value": "0" + }, + { + "name": "Aligner.mapq-max", + "value": "60" + }, + { + "name": "Aligner.mapq-min-len", + "value": "50" + }, + { + "name": "Aligner.mapq-strict-sjs", + "value": "0" + }, + { + "name": "Aligner.match-score", + "value": "1" + }, + { + "name": "Aligner.max-rescues", + "value": "1023" + }, + { + "name": "Aligner.max-stitch-olap", + "value": "48" + }, + { + "name": "Aligner.min-overhang", + "value": "6" + }, + { + "name": "Aligner.min-overhang-ann", + "value": "6" + }, + { + "name": "Aligner.min-score-coeff", + "value": "0.0" + }, + { + "name": "Aligner.mismatch-pen", + "value": "4" + }, + { + "name": "Aligner.no-align-score", + "value": "-8388608" + }, + { + "name": "Aligner.no-ambig-strand", + "value": "1" + }, + { + "name": "Aligner.no-noncan-motifs", + "value": "1" + }, + { + "name": "Aligner.no-unclip-score", + "value": "1" + }, + { + "name": "Aligner.no-unpaired", + "value": "0" + }, + { + "name": "Aligner.paired-mate-info", + "value": "1" + }, + { + "name": "Aligner.pe-max-penalty", + "value": "255" + }, + { + "name": "Aligner.pe-orientation", + "value": "0" + }, + { + "name": "Aligner.pe-sample-max-insert", + "value": "65535" + }, + { + "name": "Aligner.resc-ifpair-len", + "value": "48" + }, + { + "name": "Aligner.resc-nopair-len", + "value": "0" + }, + { + "name": "Aligner.rescue-ceil-factor", + "value": "3" + }, + { + "name": "Aligner.rescue-hifreq", + "value": "0" + }, + { + "name": "Aligner.rescue-kmer-len", + "value": "32" + }, + { + "name": "Aligner.rescue-max-snps", + "value": "7" + }, + { + "name": "Aligner.rna-max-insert", + "value": "4000000" + }, + { + "name": "Aligner.rna-pair-pen-rat", + "value": "5.71875" + }, + { + "name": "Aligner.sample-mapq0", + "value": "1" + }, + { + "name": "Aligner.sec-aligns", + "value": "0" + }, + { + "name": "Aligner.sec-aligns-hard", + "value": "0" + }, + { + "name": "Aligner.sec-phred-delta", + "value": "0" + }, + { + "name": "Aligner.sec-score-delta", + "value": "0" + }, + { + "name": "Aligner.supp-aligns", + "value": "3" + }, + { + "name": "Aligner.supp-as-sec", + "value": "0" + }, + { + "name": "Aligner.sw-all", + "value": "0" + }, + { + "name": "Aligner.sw-burst-diffs", + "value": "1" + }, + { + "name": "Aligner.sw-early-max", + "value": "256" + }, + { + "name": "Aligner.sw-extra-intvl", + "value": "1" + }, + { + "name": "Aligner.unclip-score", + "value": "5" + }, + { + "name": "Aligner.unpaired-pen", + "value": "80" + }, + { + "name": "Aligner.xs-pair-penalty", + "value": "25" + }, + { + "name": "Mapper.adapter-times", + "value": "0" + }, + { + "name": "Mapper.ann-sj-max-indel", + "value": "10" + }, + { + "name": "Mapper.chain-diam-lim", + "value": "8" + }, + { + "name": "Mapper.chain-rad-lim", + "value": "5" + }, + { + "name": "Mapper.cut-bases", + "value": "0" + }, + { + "name": "Mapper.dark-base", + "value": "2" + }, + { + "name": "Mapper.edit-chain-limit", + "value": "29" + }, + { + "name": "Mapper.edit-mode", + "value": "0" + }, + { + "name": "Mapper.edit-read-len", + "value": "100" + }, + { + "name": "Mapper.edit-seed-num", + "value": "6" + }, + { + "name": "Mapper.filt-good-qual", + "value": "0" + }, + { + "name": "Mapper.filter-len-ratio", + "value": "4" + }, + { + "name": "Mapper.intvl-max-hits", + "value": "16" + }, + { + "name": "Mapper.intvl-min-chains", + "value": "8" + }, + { + "name": "Mapper.intvl-sample-hits", + "value": "16" + }, + { + "name": "Mapper.intvl-seed-length", + "value": "60" + }, + { + "name": "Mapper.intvl-seed-longer", + "value": "8" + }, + { + "name": "Mapper.intvl-target-hits", + "value": "32" + }, + { + "name": "Mapper.map-orientations", + "value": "0" + }, + { + "name": "Mapper.max-dram-reqs", + "value": "0" + }, + { + "name": "Mapper.max-hifreq-hits", + "value": "16" + }, + { + "name": "Mapper.max-intron-bases", + "value": "200000" + }, + { + "name": "Mapper.max-lowq-bases", + "value": "4294967295" + }, + { + "name": "Mapper.max-lowq-ratio", + "value": "4294967295" + }, + { + "name": "Mapper.max-read-len", + "value": "4294967295" + }, + { + "name": "Mapper.max-seed-chains", + "value": "511" + }, + { + "name": "Mapper.max-splice-gap", + "value": "150" + }, + { + "name": "Mapper.max-splice-olap", + "value": "16" + }, + { + "name": "Mapper.min-intron-bases", + "value": "20" + }, + { + "name": "Mapper.min-trim-bases", + "value": "0" + }, + { + "name": "Mapper.n-base-qual", + "value": "2" + }, + { + "name": "Mapper.polyg-cutoff", + "value": "0" + }, + { + "name": "Mapper.qual-cutoff", + "value": "0" + }, + { + "name": "Mapper.read-edge-seeds", + "value": "0" + }, + { + "name": "Mapper.rna-filt-ratio", + "value": "25" + }, + { + "name": "Mapper.rna-max-covg-gap", + "value": "150" + }, + { + "name": "Mapper.rna-max-recurs", + "value": "65536" + }, + { + "name": "Mapper.rna-span-log-min", + "value": "13" + }, + { + "name": "Mapper.seed-density", + "value": "0.5" + }, + { + "name": "Mapper.seed-max-age", + "value": "31" + }, + { + "name": "Mapper.seed-old-age", + "value": "9" + }, + { + "name": "Mapper.splice-olap-adj", + "value": "4" + }, + { + "name": "Mapper.trace-mode", + "value": "0" + }, + { + "name": "Mapper.trace-offset", + "value": "0" + }, + { + "name": "Mapper.trace-read-id", + "value": "0" + }, + { + "name": "append-read-index-to-name", + "value": "false" + }, + { + "name": "assert-valid-cigar", + "value": "false" + }, + { + "name": "autodetect-reference-validate", + "value": "false" + }, + { + "name": "bam2dbam-threads", + "value": "12" + }, + { + "name": "bin-split-target-size", + "value": "104857600" + }, + { + "name": "bin-split-threshold", + "value": "7158278826" + }, + { + "name": "bin_memory", + "value": "21474836480" + }, + { + "name": "binner-use-odirect", + "value": "true" + }, + { + "name": "bqsr-context-low-quality-tail", + "value": "2" + }, + { + "name": "bqsr-cycle-indel-context", + "value": "3" + }, + { + "name": "bqsr-cycle-mismatch-context", + "value": "2" + }, + { + "name": "bqsr-emit-indel-tags", + "value": "true" + }, + { + "name": "bqsr-enable-recal-indels", + "value": "true" + }, + { + "name": "bqsr-match-gatk", + "value": "true" + }, + { + "name": "bqsr-max-cycle-value", + "value": "500" + }, + { + "name": "build-hash-table", + "value": "true" + }, + { + "name": "c2s_aligner_packet_size", + "value": "524288" + }, + { + "name": "c2s_aligner_pool_size", + "value": "64" + }, + { + "name": "c2s_decomp_packet_size", + "value": "1048576" + }, + { + "name": "c2s_decomp_pool_size", + "value": "64" + }, + { + "name": "c2s_graph_packet_size", + "value": "65536" + }, + { + "name": "c2s_graph_pool_size", + "value": "256" + }, + { + "name": "c2s_hmm_packet_size", + "value": "4096" + }, + { + "name": "c2s_hmm_pool_size", + "value": "128" + }, + { + "name": "c2s_smw_packet_size", + "value": "4096" + }, + { + "name": "c2s_smw_pool_size", + "value": "128" + }, + { + "name": "cgvcf-num-file-scan-threads", + "value": "8" + }, + { + "name": "cgvcf-save-tmp-files", + "value": "false" + }, + { + "name": "cgvcf-split-chromosomes", + "value": "false" + }, + { + "name": "combine-samples-by-name", + "value": "false" + }, + { + "name": "credentials-1", + "value": "" + }, + { + "name": "credentials-2", + "value": "" + }, + { + "name": "credentials-3", + "value": "" + }, + { + "name": "dbam2bam_threads", + "value": "32" + }, + { + "name": "debug", + "value": "0" + }, + { + "name": "disable_reg_validation", + "value": "0" + }, + { + "name": "distinct-dbam-input-format", + "value": "true" + }, + { + "name": "dump-hang-diag-first", + "value": "true" + }, + { + "name": "dump-map-align-registers", + "value": "0" + }, + { + "name": "dump_config", + "value": "0" + }, + { + "name": "dump_registers", + "value": "0" + }, + { + "name": "dupmark-version", + "value": "sort" + }, + { + "name": "echo_aligner_log", + "value": "0" + }, + { + "name": "echo_general_log", + "value": "0" + }, + { + "name": "echo_mapper_log", + "value": "0" + }, + { + "name": "enable-auto-multifile", + "value": "true" + }, + { + "name": "enable-bqsr", + "value": "false" + }, + { + "name": "enable-deterministic-sort", + "value": "true" + }, + { + "name": "enable-duplicate-marking", + "value": "false" + }, + { + "name": "enable-hang-diag", + "value": "true" + }, + { + "name": "enable-http-server", + "value": "false" + }, + { + "name": "enable-map-align", + "value": "true" + }, + { + "name": "enable-methylation-calling", + "value": "true" + }, + { + "name": "enable-pstack", + "value": "true" + }, + { + "name": "enable-public-bitstream", + "value": "false" + }, + { + "name": "enable-rna-quantification", + "value": "false" + }, + { + "name": "enable-sampling", + "value": "true" + }, + { + "name": "enable-single-cell-rna", + "value": "false" + }, + { + "name": "enable-sort", + "value": "true" + }, + { + "name": "enable-spin", + "value": "false" + }, + { + "name": "enable-umi-stat-estimator", + "value": "false" + }, + { + "name": "enable-variant-caller", + "value": "false" + }, + { + "name": "enable-vcf-indexing", + "value": "true" + }, + { + "name": "enable-watchdog", + "value": "true" + }, + { + "name": "enable-write-input-dbam", + "value": "false" + }, + { + "name": "evict_all_intermediate_results", + "value": "false" + }, + { + "name": "fastq-n-quality", + "value": "2" + }, + { + "name": "fastq-offset", + "value": "33" + }, + { + "name": "fastq2dbam_ratio", + "value": "2" + }, + { + "name": "fastq_block_size", + "value": "1048576" + }, + { + "name": "fastq_pool_size", + "value": "4" + }, + { + "name": "fastqc-adapter-file", + "value": "adapter_sequences.fasta" + }, + { + "name": "fastqc-granularity", + "value": "7" + }, + { + "name": "fastqc-only", + "value": "false" + }, + { + "name": "filter-flags-from-output", + "value": "0" + }, + { + "name": "gc-metrics-cover-percent", + "value": "75" + }, + { + "name": "gc-metrics-enable", + "value": "false" + }, + { + "name": "gc-metrics-num-bins", + "value": "5" + }, + { + "name": "gc-metrics-only-covered", + "value": "false" + }, + { + "name": "gc-metrics-window-size", + "value": "100" + }, + { + "name": "generate-en-tags", + "value": "false" + }, + { + "name": "generate-md-tags", + "value": "false" + }, + { + "name": "generate-xq-tags", + "value": "true" + }, + { + "name": "generate-zs-tags", + "value": "false" + }, + { + "name": "ht-anchor-bin-bits", + "value": "0" + }, + { + "name": "ht-cost-coeff-seed-freq", + "value": "0.5" + }, + { + "name": "ht-cost-coeff-seed-len", + "value": "1" + }, + { + "name": "ht-cost-penalty", + "value": "0" + }, + { + "name": "ht-cost-penalty-incr", + "value": "0.69999999999999996" + }, + { + "name": "ht-crc-extended", + "value": "0" + }, + { + "name": "ht-crc-primary", + "value": "0" + }, + { + "name": "ht-dump-int-params", + "value": "0" + }, + { + "name": "ht-ext-rec-cost", + "value": "4" + }, + { + "name": "ht-max-dec-factor", + "value": "1" + }, + { + "name": "ht-max-ext-incr", + "value": "12" + }, + { + "name": "ht-max-ext-seed-len", + "value": "0" + }, + { + "name": "ht-max-seed-freq", + "value": "16" + }, + { + "name": "ht-max-seed-freq-len", + "value": "98" + }, + { + "name": "ht-max-table-chunks", + "value": "0" + }, + { + "name": "ht-mem-limit", + "value": "0GB" + }, + { + "name": "ht-methylated", + "value": "false" + }, + { + "name": "ht-min-repair-prob", + "value": "0.20000000000000001" + }, + { + "name": "ht-override-size-check", + "value": "0" + }, + { + "name": "ht-pri-max-seed-freq", + "value": "0" + }, + { + "name": "ht-rand-hit-extend", + "value": "8" + }, + { + "name": "ht-rand-hit-hifreq", + "value": "1" + }, + { + "name": "ht-ref-seed-interval", + "value": "1" + }, + { + "name": "ht-reference", + "value": "tiny.fasta" + }, + { + "name": "ht-repair-strategy", + "value": "0" + }, + { + "name": "ht-seed-len", + "value": "21" + }, + { + "name": "ht-size", + "value": "0GB" + }, + { + "name": "ht-soft-seed-freq-cap", + "value": "12" + }, + { + "name": "ht-target-seed-freq", + "value": "4" + }, + { + "name": "ht-test-only", + "value": "0" + }, + { + "name": "ht-write-hash-bin", + "value": "0" + }, + { + "name": "http-server-port", + "value": "7993" + }, + { + "name": "input-qname-suffix-delimiter", + "value": "/" + }, + { + "name": "linkedreads-correction-table1", + "value": "linkedreads_corrections_1.txt" + }, + { + "name": "linkedreads-correction-table2", + "value": "linkedreads_corrections_2.txt" + }, + { + "name": "linkedreads-correction-table3", + "value": "linkedreads_corrections_3.txt" + }, + { + "name": "linkedreads-enable", + "value": "false" + }, + { + "name": "logfile_prefix", + "value": "/opt/edico/logs/" + }, + { + "name": "mapper_cigar", + "value": "0" + }, + { + "name": "max_bin_size", + "value": "943718400" + }, + { + "name": "max_ios_inflight", + "value": "1024" + }, + { + "name": "methylation-TAPS", + "value": "false" + }, + { + "name": "methylation-generate-cytosine-report", + "value": "false" + }, + { + "name": "methylation-generate-mbias-report", + "value": "false" + }, + { + "name": "methylation-match-bismark", + "value": "false" + }, + { + "name": "methylation-protocol", + "value": "none" + }, + { + "name": "methylation-reports-only", + "value": "false" + }, + { + "name": "min-predicted-output-gb", + "value": "200" + }, + { + "name": "multiplier", + "value": "1" + }, + { + "name": "no-reset", + "value": "false" + }, + { + "name": "output-directory", + "value": "tiny.v8" + }, + { + "name": "output-format", + "value": "bam" + }, + { + "name": "pair-by-name", + "value": "true" + }, + { + "name": "pair-suffix-delimiter", + "value": "/" + }, + { + "name": "partition-on-compression-bottleneck", + "value": "false" + }, + { + "name": "pe-stats-continuous-update", + "value": "false" + }, + { + "name": "pe-stats-interval-delay", + "value": "5" + }, + { + "name": "pe-stats-interval-memory", + "value": "10" + }, + { + "name": "pe-stats-interval-size", + "value": "25000" + }, + { + "name": "pe-stats-sample-size", + "value": "100000" + }, + { + "name": "pe-stats-update-log-only", + "value": "false" + }, + { + "name": "preserve-bqsr-tags", + "value": "false" + }, + { + "name": "preserve-map-align-order", + "value": "false" + }, + { + "name": "qc-indel-denovo-quality-threshold", + "value": "0.02" + }, + { + "name": "qc-snp-denovo-quality-threshold", + "value": "0.050000000000000003" + }, + { + "name": "read-trimmers", + "value": "none" + }, + { + "name": "recordset-memory", + "value": "1073741824" + }, + { + "name": "reg_errors_are_warnings", + "value": "0" + }, + { + "name": "remove-duplicates", + "value": "false" + }, + { + "name": "repeat-genotype-enable", + "value": "false" + }, + { + "name": "repeat-genotype-min-anchor-mapq", + "value": "60" + }, + { + "name": "repeat-genotype-min-baseq", + "value": "20" + }, + { + "name": "repeat-genotype-min-score", + "value": "0.90000000000000002" + }, + { + "name": "repeat-genotype-read-depth", + "value": "0" + }, + { + "name": "repeat-genotype-read-length", + "value": "0" + }, + { + "name": "repeat-genotype-region-extension-length", + "value": "1000" + }, + { + "name": "repeat-genotype-skip-unaligned", + "value": "true" + }, + { + "name": "repeat-genotype-specs", + "value": "" + }, + { + "name": "rna-ann-sj-min-len", + "value": "6" + }, + { + "name": "rna-cv-min-expression", + "value": "0" + }, + { + "name": "rna-gf-aggressive-filters", + "value": "false" + }, + { + "name": "rna-gf-blast-pairs", + "value": "" + }, + { + "name": "rna-gf-coverage-lookup-window", + "value": "1000" + }, + { + "name": "rna-gf-enriched-genes", + "value": "" + }, + { + "name": "rna-gf-enriched-only", + "value": "false" + }, + { + "name": "rna-gf-exon-snap", + "value": "50" + }, + { + "name": "rna-gf-mate-overhang", + "value": "8" + }, + { + "name": "rna-gf-max-partners", + "value": "3" + }, + { + "name": "rna-gf-merge-calls", + "value": "true" + }, + { + "name": "rna-gf-min-alt-to-ref", + "value": "0.00999999978" + }, + { + "name": "rna-gf-min-anchor", + "value": "12" + }, + { + "name": "rna-gf-min-blast-pairs-eval", + "value": "1e-100" + }, + { + "name": "rna-gf-min-breakpoint-mapq", + "value": "20" + }, + { + "name": "rna-gf-min-cis-distance", + "value": "200000" + }, + { + "name": "rna-gf-min-covered-bases", + "value": "125" + }, + { + "name": "rna-gf-min-covered-bases-uncaptured", + "value": "79" + }, + { + "name": "rna-gf-min-neighbor-dist", + "value": "15" + }, + { + "name": "rna-gf-min-score", + "value": "0.5" + }, + { + "name": "rna-gf-min-score-ratio", + "value": "0.150000006" + }, + { + "name": "rna-gf-min-support", + "value": "2" + }, + { + "name": "rna-gf-min-support-be", + "value": "10" + }, + { + "name": "rna-gf-min-unique-alignments", + "value": "2" + }, + { + "name": "rna-gf-num-threads", + "value": "4" + }, + { + "name": "rna-gf-ref-anchor", + "value": "8" + }, + { + "name": "rna-gf-restrict-genes", + "value": "true" + }, + { + "name": "rna-gf-score-model", + "value": "" + }, + { + "name": "rna-mapq-unique", + "value": "0" + }, + { + "name": "rna-quantification-fld-max", + "value": "1000" + }, + { + "name": "rna-quantification-fld-mean", + "value": "250" + }, + { + "name": "rna-quantification-fld-sd", + "value": "25" + }, + { + "name": "rna-quantification-full-concordance", + "value": "false" + }, + { + "name": "rna-quantification-gc-bias", + "value": "true" + }, + { + "name": "rna-quantification-inference-max", + "value": "10000" + }, + { + "name": "rna-quantification-inference-min", + "value": "100" + }, + { + "name": "rna-quantification-init-uniform", + "value": "0" + }, + { + "name": "rna-quantification-library-type", + "value": "A" + }, + { + "name": "rna-quantification-tlen-min", + "value": "500" + }, + { + "name": "rna-quantification-use-em", + "value": "0" + }, + { + "name": "rna_aligner_buffer_size", + "value": "67108864" + }, + { + "name": "rna_aligner_buffers", + "value": "0" + }, + { + "name": "s2c_dbam_block_size", + "value": "65536" + }, + { + "name": "s2c_dbam_pool_size", + "value": "32" + }, + { + "name": "s2c_decomp_block_size", + "value": "262144" + }, + { + "name": "s2c_decomp_pool_size", + "value": "64" + }, + { + "name": "s2c_graph_block_size", + "value": "16384" + }, + { + "name": "s2c_graph_pool_size", + "value": "256" + }, + { + "name": "s2c_hmm_block_size", + "value": "16384" + }, + { + "name": "s2c_hmm_pool_size", + "value": "16384" + }, + { + "name": "s2c_phase1_packet_size", + "value": "16384" + }, + { + "name": "s2c_phase2_packet_size", + "value": "16384" + }, + { + "name": "s2c_smw_block_size", + "value": "16384" + }, + { + "name": "s2c_smw_pool_size", + "value": "2048" + }, + { + "name": "single-cell-barcode", + "value": "16" + }, + { + "name": "single-cell-count-introns", + "value": "false" + }, + { + "name": "single-cell-global-umi", + "value": "false" + }, + { + "name": "single-cell-type", + "value": "simple" + }, + { + "name": "single-cell-umi", + "value": "8" + }, + { + "name": "soft-read-trimmers", + "value": "polyg" + }, + { + "name": "sort_buffer_size", + "value": "1048576" + }, + { + "name": "stop-at-read", + "value": "0" + }, + { + "name": "strip-input-qname-suffixes", + "value": "true" + }, + { + "name": "sv-denovo-threshold", + "value": "20" + }, + { + "name": "sv-enable-rrm-for-insertions-in-cancer-calling-modes", + "value": "true" + }, + { + "name": "sv-enable-rrm-for-insertions-in-germline-calling-modes", + "value": "true" + }, + { + "name": "sv-generate-evidence-bam", + "value": "false" + }, + { + "name": "sv-graph-node-max-edge-count", + "value": "10" + }, + { + "name": "sv-hyper-sensitivity", + "value": "false" + }, + { + "name": "sv-min-candidate-spanning-count", + "value": "3" + }, + { + "name": "sv-min-candidate-variant-size", + "value": "8" + }, + { + "name": "sv-min-diploid-variant-score", + "value": "10" + }, + { + "name": "sv-min-edge-observations", + "value": "3" + }, + { + "name": "sv-min-pass-diploid-gt-score", + "value": "15" + }, + { + "name": "sv-min-pass-diploid-variant-score", + "value": "20" + }, + { + "name": "sv-min-pass-somatic-score", + "value": "30" + }, + { + "name": "sv-min-scored-variant-size", + "value": "50" + }, + { + "name": "sv-min-somatic-score", + "value": "10" + }, + { + "name": "sv-mobile-element-sequences", + "value": "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACGAGGTCAGGAGATCGAGACCATCCCGGCTAAAACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAATTAGCCGGGCGTAGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCCCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTC GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGTGGATCATGAGGTCAGGAGATCGAGACCATCCTGGCTAACAAGGTGAAACCCCGTCTCTACTAAAAATACAAAAAATTAGCCGGGCGCGGTGGCGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCGGGAAGCGGAGCTTGCAGTGAGCCGAGATTGCGCCACTGCAGTCCGCAGTCCGGCCTGGGCGACAGAGCGAGACTCCGTCTC " + }, + { + "name": "sv-rna-min-candidate-variant-size", + "value": "1000" + }, + { + "name": "sv-use-overlap-pair-evidence", + "value": "false" + }, + { + "name": "trim-disable-mapping", + "value": "false" + }, + { + "name": "trim-filter-dummy-len", + "value": "10" + }, + { + "name": "trim-filter-set-flag", + "value": "true" + }, + { + "name": "trim-min-len-read1", + "value": "20" + }, + { + "name": "trim-min-len-read2", + "value": "20" + }, + { + "name": "trim-polyg-early-exit-threshold", + "value": "-500" + }, + { + "name": "trim-polyg-g-score-r1-3prime", + "value": "15" + }, + { + "name": "trim-polyg-g-score-r1-5prime", + "value": "0" + }, + { + "name": "trim-polyg-g-score-r2-3prime", + "value": "15" + }, + { + "name": "trim-polyg-g-score-r2-5prime", + "value": "0" + }, + { + "name": "trim-polyg-kmer-len", + "value": "25" + }, + { + "name": "trim-polyg-kmer-non-g", + "value": "2" + }, + { + "name": "trim-polyg-min-trim-r1-3prime", + "value": "6" + }, + { + "name": "trim-polyg-min-trim-r1-5prime", + "value": "6" + }, + { + "name": "trim-polyg-min-trim-r2-3prime", + "value": "6" + }, + { + "name": "trim-polyg-min-trim-r2-5prime", + "value": "6" + }, + { + "name": "umi-base-representation-min-ratio", + "value": "0.5" + }, + { + "name": "umi-correction-scheme", + "value": "lookup" + }, + { + "name": "umi-correction-table", + "value": "umi_correction_table.txt" + }, + { + "name": "umi-enable", + "value": "false" + }, + { + "name": "umi-enable-contextual-corrections", + "value": "true" + }, + { + "name": "umi-enable-duplex-merging", + "value": "true" + }, + { + "name": "umi-enable-probability-model-merging", + "value": "false" + }, + { + "name": "umi-enable-shift-corrections", + "value": "true" + }, + { + "name": "umi-enable-trimming", + "value": "true" + }, + { + "name": "umi-end-mask-length", + "value": "3" + }, + { + "name": "umi-generate-bam-tags", + "value": "true" + }, + { + "name": "umi-masked-base-qual", + "value": "1" + }, + { + "name": "umi-max-base-quality", + "value": "63" + }, + { + "name": "umi-mem-throttle-gb", + "value": "30" + }, + { + "name": "umi-min-map-quality", + "value": "0" + }, + { + "name": "umi-min-reads-per-region", + "value": "4096" + }, + { + "name": "umi-min-supporting-reads", + "value": "2" + }, + { + "name": "umi-padding", + "value": "A" + }, + { + "name": "umi-preserve-input-tags", + "value": "false" + }, + { + "name": "umi-probability-merging-duplex-merging-thres", + "value": "1" + }, + { + "name": "umi-probability-merging-max-transition-ratio", + "value": "10000" + }, + { + "name": "umi-probability-merging-min-isize-freq", + "value": "0.001" + }, + { + "name": "umi-probability-merging-seq-error", + "value": "0.001" + }, + { + "name": "umi-probability-merging-simplex-fuzzy-merging-thres", + "value": "1" + }, + { + "name": "umi-probability-merging-simplex-merging-thres", + "value": "1" + }, + { + "name": "umi-random-merge-factor", + "value": "2" + }, + { + "name": "umi-read-minority-min-ratio", + "value": "0.5" + }, + { + "name": "umi-soft-clip-ratio", + "value": "0.5" + }, + { + "name": "umi-source", + "value": "qname" + }, + { + "name": "umi-start-mask-length", + "value": "1" + }, + { + "name": "umi-stat-estimation-max-fragment-count", + "value": "30" + }, + { + "name": "umi-stat-estimation-max-fragment-size", + "value": "1000" + }, + { + "name": "umi-stat-estimation-max-interval-number", + "value": "50" + }, + { + "name": "umi-stat-estimation-min-probability-unique-fragment", + "value": "0.998" + }, + { + "name": "umi-stat-estimation-umi-jumping-estimation-method", + "value": "simplex" + }, + { + "name": "umi-trim-allowed-mismatches", + "value": "1" + }, + { + "name": "umi-verbose-metrics", + "value": "false" + }, + { + "name": "use-mock-config", + "value": "false" + }, + { + "name": "vc-active-only", + "value": "false" + }, + { + "name": "vc-decoy-contigs", + "value": "NC_007605 hs37d5 chrUn_KN707*v1_decoy chrUn_JTFH0100*v1_decoy KN707*.1 JTFH0100*.1 chrEBV CMV HBV HCV* HIV* KSHV HTLV* MCV SV40 HPV* " + }, + { + "name": "vc-emit-ref-confidence", + "value": "NONE" + }, + { + "name": "vc-emit-zero-coverage-intervals", + "value": "true" + }, + { + "name": "vc-enable-basecall-filter", + "value": "false" + }, + { + "name": "vc-enable-deterministic-run", + "value": "true" + }, + { + "name": "vc-enable-hw-hmm", + "value": "true" + }, + { + "name": "vc-enable-hw-hmm-dump-receiver-data", + "value": "false" + }, + { + "name": "vc-enable-hw-hmm-dump-sender-data", + "value": "false" + }, + { + "name": "vc-enable-hw-hmm-dump-worker-data", + "value": "false" + }, + { + "name": "vc-enable-hw-smw", + "value": "true" + }, + { + "name": "vc-enable-hw-smw-dump-receiver-data", + "value": "false" + }, + { + "name": "vc-enable-hw-smw-dump-sender-data", + "value": "false" + }, + { + "name": "vc-enable-hw-smw-dump-worker-data", + "value": "false" + }, + { + "name": "vc-hw-hmm-timeout", + "value": "100000" + }, + { + "name": "vc-limit-genomecov-output", + "value": "false" + }, + { + "name": "vc-max-alternate-alleles", + "value": "6" + }, + { + "name": "vc-max-haps-per-job", + "value": "2" + }, + { + "name": "vc-sw-cell-width", + "value": "16" + }, + { + "name": "vc-sw-instruction-set", + "value": "sse2" + }, + { + "name": "vc-sw-mode", + "value": "0" + }, + { + "name": "vqsr-lod-cutoff", + "value": "-5.0" + }, + { + "name": "vqsr-num-gaussians", + "value": "8,2,4,2" + }, + { + "name": "watchdog-active-timeout", + "value": "600" + }, + { + "name": "watchdog-dump-processes", + "value": "true;" + }, + { + "name": "watchdog-exit-on-hang", + "value": "true" + }, + { + "name": "watchdog-freemem-threshold", + "value": "2GB" + }, + { + "name": "watchdog-idle-timeout", + "value": "600" + }, + { + "name": "watchdog-max-threads", + "value": "8388608" + }, + { + "name": "watchdog-poll-interval", + "value": "1" + }, + { + "name": "watchdog-resources-monitored", + "value": "THREADS IO MEMORY " + }, + { + "name": "watchdog-verbose-logging", + "value": "true" } + ], + "system": { + "dragen_version": "07.021.572.3.7.0-205-gbcf57830", + "nodename": "ussd-tst-drgn33.illumina.com", + "kernel_release": "3.10.0-1062.12.1.el7.x86_64" + } } diff --git a/data/tiny/tiny-2x1Xrepeats.v8/dragen-replay.json b/data/tiny/tiny-2x1Xrepeats.v8/dragen-replay.json index 4306611..07035db 100644 --- a/data/tiny/tiny-2x1Xrepeats.v8/dragen-replay.json +++ b/data/tiny/tiny-2x1Xrepeats.v8/dragen-replay.json @@ -1,1762 +1,1762 @@ { - "command_line": "\/staging\/rpetrovski\/dragen\/objtree_el7-master\/bin\/dragen --build-hash-table true --ht-reference tiny.fasta --output-directory tiny.v8", - "dragen_config": [ - { - "name": "Aligner.align-direction", - "value": "4" - }, - { - "name": "Aligner.aln-en-mask", - "value": "15" - }, - { - "name": "Aligner.aln-enable", - "value": "1" - }, - { - "name": "Aligner.backtrace-delay", - "value": "8" - }, - { - "name": "Aligner.dedup-min-qual", - "value": "15" - }, - { - "name": "Aligner.disable-lfsr", - "value": "0" - }, - { - "name": "Aligner.en-alt-hap-aln", - "value": "1" - }, - { - "name": "Aligner.en-chimeric-aln", - "value": "1" - }, - { - "name": "Aligner.exon-jump-en", - "value": "1" - }, - { - "name": "Aligner.filt-clip1-pair", - "value": "10" - }, - { - "name": "Aligner.filt-clip1-unpr", - "value": "-5" - }, - { - "name": "Aligner.filt-clip2-pair", - "value": "5" - }, - { - "name": "Aligner.filt-clip2-unpr", - "value": "-10" - }, - { - "name": "Aligner.filt-min-qual", - "value": "0" - }, - { - "name": "Aligner.fix-overlap-mapq", - "value": "0" - }, - { - "name": "Aligner.gap-ext-pen", - "value": "1" - }, - { - "name": "Aligner.gap-open-pen", - "value": "6" - }, - { - "name": "Aligner.global", - "value": "0" - }, - { - "name": "Aligner.hard-clips", - "value": "6" - }, - { - "name": "Aligner.hw-mapq-max", - "value": "250" - }, - { - "name": "Aligner.leftalign-mode", - "value": "2" - }, - { - "name": "Aligner.mapq-coeff", - "value": "152" - }, - { - "name": "Aligner.mapq-floor-1snp", - "value": "0" - }, - { - "name": "Aligner.mapq-max", - "value": "60" - }, - { - "name": "Aligner.mapq-min-len", - "value": "50" - }, - { - "name": "Aligner.mapq-strict-sjs", - "value": "0" - }, - { - "name": "Aligner.match-score", - "value": "1" - }, - { - "name": "Aligner.max-rescues", - "value": "1023" - }, - { - "name": "Aligner.max-stitch-olap", - "value": "48" - }, - { - "name": "Aligner.min-overhang", - "value": "6" - }, - { - "name": "Aligner.min-overhang-ann", - "value": "6" - }, - { - "name": "Aligner.min-score-coeff", - "value": "0.0" - }, - { - "name": "Aligner.mismatch-pen", - "value": "4" - }, - { - "name": "Aligner.no-align-score", - "value": "-8388608" - }, - { - "name": "Aligner.no-ambig-strand", - "value": "1" - }, - { - "name": "Aligner.no-noncan-motifs", - "value": "1" - }, - { - "name": "Aligner.no-unclip-score", - "value": "1" - }, - { - "name": "Aligner.no-unpaired", - "value": "0" - }, - { - "name": "Aligner.paired-mate-info", - "value": "1" - }, - { - "name": "Aligner.pe-max-penalty", - "value": "255" - }, - { - "name": "Aligner.pe-orientation", - "value": "0" - }, - { - "name": "Aligner.pe-sample-max-insert", - "value": "65535" - }, - { - "name": "Aligner.resc-ifpair-len", - "value": "48" - }, - { - "name": "Aligner.resc-nopair-len", - "value": "0" - }, - { - "name": "Aligner.rescue-ceil-factor", - "value": "3" - }, - { - "name": "Aligner.rescue-hifreq", - "value": "0" - }, - { - "name": "Aligner.rescue-kmer-len", - "value": "32" - }, - { - "name": "Aligner.rescue-max-snps", - "value": "7" - }, - { - "name": "Aligner.rna-max-insert", - "value": "4000000" - }, - { - "name": "Aligner.rna-pair-pen-rat", - "value": "5.71875" - }, - { - "name": "Aligner.sample-mapq0", - "value": "1" - }, - { - "name": "Aligner.sec-aligns", - "value": "0" - }, - { - "name": "Aligner.sec-aligns-hard", - "value": "0" - }, - { - "name": "Aligner.sec-phred-delta", - "value": "0" - }, - { - "name": "Aligner.sec-score-delta", - "value": "0" - }, - { - "name": "Aligner.supp-aligns", - "value": "3" - }, - { - "name": "Aligner.supp-as-sec", - "value": "0" - }, - { - "name": "Aligner.sw-all", - "value": "0" - }, - { - "name": "Aligner.sw-burst-diffs", - "value": "1" - }, - { - "name": "Aligner.sw-early-max", - "value": "256" - }, - { - "name": "Aligner.sw-extra-intvl", - "value": "1" - }, - { - "name": "Aligner.unclip-score", - "value": "5" - }, - { - "name": "Aligner.unpaired-pen", - "value": "80" - }, - { - "name": "Aligner.xs-pair-penalty", - "value": "25" - }, - { - "name": "Mapper.adapter-times", - "value": "0" - }, - { - "name": "Mapper.ann-sj-max-indel", - "value": "10" - }, - { - "name": "Mapper.chain-diam-lim", - "value": "8" - }, - { - "name": "Mapper.chain-rad-lim", - "value": "5" - }, - { - "name": "Mapper.cut-bases", - "value": "0" - }, - { - "name": "Mapper.dark-base", - "value": "2" - }, - { - "name": "Mapper.edit-chain-limit", - "value": "29" - }, - { - "name": "Mapper.edit-mode", - "value": "0" - }, - { - "name": "Mapper.edit-read-len", - "value": "100" - }, - { - "name": "Mapper.edit-seed-num", - "value": "6" - }, - { - "name": "Mapper.filt-good-qual", - "value": "0" - }, - { - "name": "Mapper.filter-len-ratio", - "value": "4" - }, - { - "name": "Mapper.intvl-max-hits", - "value": "16" - }, - { - "name": "Mapper.intvl-min-chains", - "value": "8" - }, - { - "name": "Mapper.intvl-sample-hits", - "value": "16" - }, - { - "name": "Mapper.intvl-seed-length", - "value": "60" - }, - { - "name": "Mapper.intvl-seed-longer", - "value": "8" - }, - { - "name": "Mapper.intvl-target-hits", - "value": "32" - }, - { - "name": "Mapper.map-orientations", - "value": "0" - }, - { - "name": "Mapper.max-dram-reqs", - "value": "0" - }, - { - "name": "Mapper.max-hifreq-hits", - "value": "16" - }, - { - "name": "Mapper.max-intron-bases", - "value": "200000" - }, - { - "name": "Mapper.max-lowq-bases", - "value": "4294967295" - }, - { - "name": "Mapper.max-lowq-ratio", - "value": "4294967295" - }, - { - "name": "Mapper.max-read-len", - "value": "4294967295" - }, - { - "name": "Mapper.max-seed-chains", - "value": "511" - }, - { - "name": "Mapper.max-splice-gap", - "value": "150" - }, - { - "name": "Mapper.max-splice-olap", - "value": "16" - }, - { - "name": "Mapper.min-intron-bases", - "value": "20" - }, - { - "name": "Mapper.min-trim-bases", - "value": "0" - }, - { - "name": "Mapper.n-base-qual", - "value": "2" - }, - { - "name": "Mapper.polyg-cutoff", - "value": "0" - }, - { - "name": "Mapper.qual-cutoff", - "value": "0" - }, - { - "name": "Mapper.read-edge-seeds", - "value": "0" - }, - { - "name": "Mapper.rna-filt-ratio", - "value": "25" - }, - { - "name": "Mapper.rna-max-covg-gap", - "value": "150" - }, - { - "name": "Mapper.rna-max-recurs", - "value": "65536" - }, - { - "name": "Mapper.rna-span-log-min", - "value": "13" - }, - { - "name": "Mapper.seed-density", - "value": "0.5" - }, - { - "name": "Mapper.seed-max-age", - "value": "31" - }, - { - "name": "Mapper.seed-old-age", - "value": "9" - }, - { - "name": "Mapper.splice-olap-adj", - "value": "4" - }, - { - "name": "Mapper.trace-mode", - "value": "0" - }, - { - "name": "Mapper.trace-offset", - "value": "0" - }, - { - "name": "Mapper.trace-read-id", - "value": "0" - }, - { - "name": "append-read-index-to-name", - "value": "false" - }, - { - "name": "assert-valid-cigar", - "value": "false" - }, - { - "name": "autodetect-reference-validate", - "value": "false" - }, - { - "name": "bam2dbam-threads", - "value": "12" - }, - { - "name": "bin-split-target-size", - "value": "104857600" - }, - { - "name": "bin-split-threshold", - "value": "7158278826" - }, - { - "name": "bin_memory", - "value": "21474836480" - }, - { - "name": "binner-use-odirect", - "value": "true" - }, - { - "name": "bqsr-context-low-quality-tail", - "value": "2" - }, - { - "name": "bqsr-cycle-indel-context", - "value": "3" - }, - { - "name": "bqsr-cycle-mismatch-context", - "value": "2" - }, - { - "name": "bqsr-emit-indel-tags", - "value": "true" - }, - { - "name": "bqsr-enable-recal-indels", - "value": "true" - }, - { - "name": "bqsr-match-gatk", - "value": "true" - }, - { - "name": "bqsr-max-cycle-value", - "value": "500" - }, - { - "name": "build-hash-table", - "value": "true" - }, - { - "name": "c2s_aligner_packet_size", - "value": "524288" - }, - { - "name": "c2s_aligner_pool_size", - "value": "64" - }, - { - "name": "c2s_decomp_packet_size", - "value": "1048576" - }, - { - "name": "c2s_decomp_pool_size", - "value": "64" - }, - { - "name": "c2s_graph_packet_size", - "value": "65536" - }, - { - "name": "c2s_graph_pool_size", - "value": "256" - }, - { - "name": "c2s_hmm_packet_size", - "value": "4096" - }, - { - "name": "c2s_hmm_pool_size", - "value": "128" - }, - { - "name": "c2s_smw_packet_size", - "value": "4096" - }, - { - "name": "c2s_smw_pool_size", - "value": "128" - }, - { - "name": "cgvcf-num-file-scan-threads", - "value": "8" - }, - { - "name": "cgvcf-save-tmp-files", - "value": "false" - }, - { - "name": "cgvcf-split-chromosomes", - "value": "false" - }, - { - "name": "combine-samples-by-name", - "value": "false" - }, - { - "name": "credentials-1", - "value": "" - }, - { - "name": "credentials-2", - "value": "" - }, - { - "name": "credentials-3", - "value": "" - }, - { - "name": "dbam2bam_threads", - "value": "32" - }, - { - "name": "debug", - "value": "0" - }, - { - "name": "disable_reg_validation", - "value": "0" - }, - { - "name": "distinct-dbam-input-format", - "value": "true" - }, - { - "name": "dump-hang-diag-first", - "value": "true" - }, - { - "name": "dump-map-align-registers", - "value": "0" - }, - { - "name": "dump_config", - "value": "0" - }, - { - "name": "dump_registers", - "value": "0" - }, - { - "name": "dupmark-version", - "value": "sort" - }, - { - "name": "echo_aligner_log", - "value": "0" - }, - { - "name": "echo_general_log", - "value": "0" - }, - { - "name": "echo_mapper_log", - "value": "0" - }, - { - "name": "enable-auto-multifile", - "value": "true" - }, - { - "name": "enable-bqsr", - "value": "false" - }, - { - "name": "enable-deterministic-sort", - "value": "true" - }, - { - "name": "enable-duplicate-marking", - "value": "false" - }, - { - "name": "enable-hang-diag", - "value": "true" - }, - { - "name": "enable-http-server", - "value": "false" - }, - { - "name": "enable-map-align", - "value": "true" - }, - { - "name": "enable-methylation-calling", - "value": "true" - }, - { - "name": "enable-pstack", - "value": "true" - }, - { - "name": "enable-public-bitstream", - "value": "false" - }, - { - "name": "enable-rna-quantification", - "value": "false" - }, - { - "name": "enable-sampling", - "value": "true" - }, - { - "name": "enable-single-cell-rna", - "value": "false" - }, - { - "name": "enable-sort", - "value": "true" - }, - { - "name": "enable-spin", - "value": "false" - }, - { - "name": "enable-umi-stat-estimator", - "value": "false" - }, - { - "name": "enable-variant-caller", - "value": "false" - }, - { - "name": "enable-vcf-indexing", - "value": "true" - }, - { - "name": "enable-watchdog", - "value": "true" - }, - { - "name": "enable-write-input-dbam", - "value": "false" - }, - { - "name": "evict_all_intermediate_results", - "value": "false" - }, - { - "name": "fastq-n-quality", - "value": "2" - }, - { - "name": "fastq-offset", - "value": "33" - }, - { - "name": "fastq2dbam_ratio", - "value": "2" - }, - { - "name": "fastq_block_size", - "value": "1048576" - }, - { - "name": "fastq_pool_size", - "value": "4" - }, - { - "name": "fastqc-adapter-file", - "value": "adapter_sequences.fasta" - }, - { - "name": "fastqc-granularity", - "value": "7" - }, - { - "name": "fastqc-only", - "value": "false" - }, - { - "name": "filter-flags-from-output", - "value": "0" - }, - { - "name": "gc-metrics-cover-percent", - "value": "75" - }, - { - "name": "gc-metrics-enable", - "value": "false" - }, - { - "name": "gc-metrics-num-bins", - "value": "5" - }, - { - "name": "gc-metrics-only-covered", - "value": "false" - }, - { - "name": "gc-metrics-window-size", - "value": "100" - }, - { - "name": "generate-en-tags", - "value": "false" - }, - { - "name": "generate-md-tags", - "value": "false" - }, - { - "name": "generate-xq-tags", - "value": "true" - }, - { - "name": "generate-zs-tags", - "value": "false" - }, - { - "name": "ht-anchor-bin-bits", - "value": "0" - }, - { - "name": "ht-cost-coeff-seed-freq", - "value": "0.5" - }, - { - "name": "ht-cost-coeff-seed-len", - "value": "1" - }, - { - "name": "ht-cost-penalty", - "value": "0" - }, - { - "name": "ht-cost-penalty-incr", - "value": "0.69999999999999996" - }, - { - "name": "ht-crc-extended", - "value": "0" - }, - { - "name": "ht-crc-primary", - "value": "0" - }, - { - "name": "ht-dump-int-params", - "value": "0" - }, - { - "name": "ht-ext-rec-cost", - "value": "4" - }, - { - "name": "ht-max-dec-factor", - "value": "1" - }, - { - "name": "ht-max-ext-incr", - "value": "12" - }, - { - "name": "ht-max-ext-seed-len", - "value": "0" - }, - { - "name": "ht-max-seed-freq", - "value": "16" - }, - { - "name": "ht-max-seed-freq-len", - "value": "98" - }, - { - "name": "ht-max-table-chunks", - "value": "0" - }, - { - "name": "ht-mem-limit", - "value": "0GB" - }, - { - "name": "ht-methylated", - "value": "false" - }, - { - "name": "ht-min-repair-prob", - "value": "0.20000000000000001" - }, - { - "name": "ht-override-size-check", - "value": "0" - }, - { - "name": "ht-pri-max-seed-freq", - "value": "0" - }, - { - "name": "ht-rand-hit-extend", - "value": "8" - }, - { - "name": "ht-rand-hit-hifreq", - "value": "1" - }, - { - "name": "ht-ref-seed-interval", - "value": "1" - }, - { - "name": "ht-reference", - "value": "tiny.fasta" - }, - { - "name": "ht-repair-strategy", - "value": "0" - }, - { - "name": "ht-seed-len", - "value": "21" - }, - { - "name": "ht-size", - "value": "0GB" - }, - { - "name": "ht-soft-seed-freq-cap", - "value": "12" - }, - { - "name": "ht-target-seed-freq", - "value": "4" - }, - { - "name": "ht-test-only", - "value": "0" - }, - { - "name": "ht-write-hash-bin", - "value": "0" - }, - { - "name": "http-server-port", - "value": "7993" - }, - { - "name": "input-qname-suffix-delimiter", - "value": "\/" - }, - { - "name": "linkedreads-correction-table1", - "value": "linkedreads_corrections_1.txt" - }, - { - "name": "linkedreads-correction-table2", - "value": "linkedreads_corrections_2.txt" - }, - { - "name": "linkedreads-correction-table3", - "value": "linkedreads_corrections_3.txt" - }, - { - "name": "linkedreads-enable", - "value": "false" - }, - { - "name": "logfile_prefix", - "value": "\/opt\/edico\/logs\/" - }, - { - "name": "mapper_cigar", - "value": "0" - }, - { - "name": "max_bin_size", - "value": "943718400" - }, - { - "name": "max_ios_inflight", - "value": "1024" - }, - { - "name": "methylation-TAPS", - "value": "false" - }, - { - "name": "methylation-generate-cytosine-report", - "value": "false" - }, - { - "name": "methylation-generate-mbias-report", - "value": "false" - }, - { - "name": "methylation-match-bismark", - "value": "false" - }, - { - "name": "methylation-protocol", - "value": "none" - }, - { - "name": "methylation-reports-only", - "value": "false" - }, - { - "name": "min-predicted-output-gb", - "value": "200" - }, - { - "name": "multiplier", - "value": "1" - }, - { - "name": "no-reset", - "value": "false" - }, - { - "name": "output-directory", - "value": "tiny.v8" - }, - { - "name": "output-format", - "value": "bam" - }, - { - "name": "pair-by-name", - "value": "true" - }, - { - "name": "pair-suffix-delimiter", - "value": "\/" - }, - { - "name": "partition-on-compression-bottleneck", - "value": "false" - }, - { - "name": "pe-stats-continuous-update", - "value": "false" - }, - { - "name": "pe-stats-interval-delay", - "value": "5" - }, - { - "name": "pe-stats-interval-memory", - "value": "10" - }, - { - "name": "pe-stats-interval-size", - "value": "25000" - }, - { - "name": "pe-stats-sample-size", - "value": "100000" - }, - { - "name": "pe-stats-update-log-only", - "value": "false" - }, - { - "name": "preserve-bqsr-tags", - "value": "false" - }, - { - "name": "preserve-map-align-order", - "value": "false" - }, - { - "name": "qc-indel-denovo-quality-threshold", - "value": "0.02" - }, - { - "name": "qc-snp-denovo-quality-threshold", - "value": "0.050000000000000003" - }, - { - "name": "read-trimmers", - "value": "none" - }, - { - "name": "recordset-memory", - "value": "1073741824" - }, - { - "name": "reg_errors_are_warnings", - "value": "0" - }, - { - "name": "remove-duplicates", - "value": "false" - }, - { - "name": "repeat-genotype-enable", - "value": "false" - }, - { - "name": "repeat-genotype-min-anchor-mapq", - "value": "60" - }, - { - "name": "repeat-genotype-min-baseq", - "value": "20" - }, - { - "name": "repeat-genotype-min-score", - "value": "0.90000000000000002" - }, - { - "name": "repeat-genotype-read-depth", - "value": "0" - }, - { - "name": "repeat-genotype-read-length", - "value": "0" - }, - { - "name": "repeat-genotype-region-extension-length", - "value": "1000" - }, - { - "name": "repeat-genotype-skip-unaligned", - "value": "true" - }, - { - "name": "repeat-genotype-specs", - "value": "" - }, - { - "name": "rna-ann-sj-min-len", - "value": "6" - }, - { - "name": "rna-cv-min-expression", - "value": "0" - }, - { - "name": "rna-gf-aggressive-filters", - "value": "false" - }, - { - "name": "rna-gf-blast-pairs", - "value": "" - }, - { - "name": "rna-gf-coverage-lookup-window", - "value": "1000" - }, - { - "name": "rna-gf-enriched-genes", - "value": "" - }, - { - "name": "rna-gf-enriched-only", - "value": "false" - }, - { - "name": "rna-gf-exon-snap", - "value": "50" - }, - { - "name": "rna-gf-mate-overhang", - "value": "8" - }, - { - "name": "rna-gf-max-partners", - "value": "3" - }, - { - "name": "rna-gf-merge-calls", - "value": "true" - }, - { - "name": "rna-gf-min-alt-to-ref", - "value": "0.00999999978" - }, - { - "name": "rna-gf-min-anchor", - "value": "12" - }, - { - "name": "rna-gf-min-blast-pairs-eval", - "value": "1e-100" - }, - { - "name": "rna-gf-min-breakpoint-mapq", - "value": "20" - }, - { - "name": "rna-gf-min-cis-distance", - "value": "200000" - }, - { - "name": "rna-gf-min-covered-bases", - "value": "125" - }, - { - "name": "rna-gf-min-covered-bases-uncaptured", - "value": "79" - }, - { - "name": "rna-gf-min-neighbor-dist", - "value": "15" - }, - { - "name": "rna-gf-min-score", - "value": "0.5" - }, - { - "name": "rna-gf-min-score-ratio", - "value": "0.150000006" - }, - { - "name": "rna-gf-min-support", - "value": "2" - }, - { - "name": "rna-gf-min-support-be", - "value": "10" - }, - { - "name": "rna-gf-min-unique-alignments", - "value": "2" - }, - { - "name": "rna-gf-num-threads", - "value": "4" - }, - { - "name": "rna-gf-ref-anchor", - "value": "8" - }, - { - "name": "rna-gf-restrict-genes", - "value": "true" - }, - { - "name": "rna-gf-score-model", - "value": "" - }, - { - "name": "rna-mapq-unique", - "value": "0" - }, - { - "name": "rna-quantification-fld-max", - "value": "1000" - }, - { - "name": "rna-quantification-fld-mean", - "value": "250" - }, - { - "name": "rna-quantification-fld-sd", - "value": "25" - }, - { - "name": "rna-quantification-full-concordance", - "value": "false" - }, - { - "name": "rna-quantification-gc-bias", - "value": "true" - }, - { - "name": "rna-quantification-inference-max", - "value": "10000" - }, - { - "name": "rna-quantification-inference-min", - "value": "100" - }, - { - "name": "rna-quantification-init-uniform", - "value": "0" - }, - { - "name": "rna-quantification-library-type", - "value": "A" - }, - { - "name": "rna-quantification-tlen-min", - "value": "500" - }, - { - "name": "rna-quantification-use-em", - "value": "0" - }, - { - "name": "rna_aligner_buffer_size", - "value": "67108864" - }, - { - "name": "rna_aligner_buffers", - "value": "0" - }, - { - "name": "s2c_dbam_block_size", - "value": "65536" - }, - { - "name": "s2c_dbam_pool_size", - "value": "32" - }, - { - "name": "s2c_decomp_block_size", - "value": "262144" - }, - { - "name": "s2c_decomp_pool_size", - "value": "64" - }, - { - "name": "s2c_graph_block_size", - "value": "16384" - }, - { - "name": "s2c_graph_pool_size", - "value": "256" - }, - { - "name": "s2c_hmm_block_size", - "value": "16384" - }, - { - "name": "s2c_hmm_pool_size", - "value": "16384" - }, - { - "name": "s2c_phase1_packet_size", - "value": "16384" - }, - { - "name": "s2c_phase2_packet_size", - "value": "16384" - }, - { - "name": "s2c_smw_block_size", - "value": "16384" - }, - { - "name": "s2c_smw_pool_size", - "value": "2048" - }, - { - "name": "single-cell-barcode", - "value": "16" - }, - { - "name": "single-cell-count-introns", - "value": "false" - }, - { - "name": "single-cell-global-umi", - "value": "false" - }, - { - "name": "single-cell-type", - "value": "simple" - }, - { - "name": "single-cell-umi", - "value": "8" - }, - { - "name": "soft-read-trimmers", - "value": "polyg" - }, - { - "name": "sort_buffer_size", - "value": "1048576" - }, - { - "name": "stop-at-read", - "value": "0" - }, - { - "name": "strip-input-qname-suffixes", - "value": "true" - }, - { - "name": "sv-denovo-threshold", - "value": "20" - }, - { - "name": "sv-enable-rrm-for-insertions-in-cancer-calling-modes", - "value": "true" - }, - { - "name": "sv-enable-rrm-for-insertions-in-germline-calling-modes", - "value": "true" - }, - { - "name": "sv-generate-evidence-bam", - "value": "false" - }, - { - "name": "sv-graph-node-max-edge-count", - "value": "10" - }, - { - "name": "sv-hyper-sensitivity", - "value": "false" - }, - { - "name": "sv-min-candidate-spanning-count", - "value": "3" - }, - { - "name": "sv-min-candidate-variant-size", - "value": "8" - }, - { - "name": "sv-min-diploid-variant-score", - "value": "10" - }, - { - "name": "sv-min-edge-observations", - "value": "3" - }, - { - "name": "sv-min-pass-diploid-gt-score", - "value": "15" - }, - { - "name": "sv-min-pass-diploid-variant-score", - "value": "20" - }, - { - "name": "sv-min-pass-somatic-score", - "value": "30" - }, - { - "name": "sv-min-scored-variant-size", - "value": "50" - }, - { - "name": "sv-min-somatic-score", - "value": "10" - }, - { - "name": "sv-mobile-element-sequences", - "value": "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACGAGGTCAGGAGATCGAGACCATCCCGGCTAAAACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAATTAGCCGGGCGTAGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCCCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTC GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGTGGATCATGAGGTCAGGAGATCGAGACCATCCTGGCTAACAAGGTGAAACCCCGTCTCTACTAAAAATACAAAAAATTAGCCGGGCGCGGTGGCGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCGGGAAGCGGAGCTTGCAGTGAGCCGAGATTGCGCCACTGCAGTCCGCAGTCCGGCCTGGGCGACAGAGCGAGACTCCGTCTC " - }, - { - "name": "sv-rna-min-candidate-variant-size", - "value": "1000" - }, - { - "name": "sv-use-overlap-pair-evidence", - "value": "false" - }, - { - "name": "trim-disable-mapping", - "value": "false" - }, - { - "name": "trim-filter-dummy-len", - "value": "10" - }, - { - "name": "trim-filter-set-flag", - "value": "true" - }, - { - "name": "trim-min-len-read1", - "value": "20" - }, - { - "name": "trim-min-len-read2", - "value": "20" - }, - { - "name": "trim-polyg-early-exit-threshold", - "value": "-500" - }, - { - "name": "trim-polyg-g-score-r1-3prime", - "value": "15" - }, - { - "name": "trim-polyg-g-score-r1-5prime", - "value": "0" - }, - { - "name": "trim-polyg-g-score-r2-3prime", - "value": "15" - }, - { - "name": "trim-polyg-g-score-r2-5prime", - "value": "0" - }, - { - "name": "trim-polyg-kmer-len", - "value": "25" - }, - { - "name": "trim-polyg-kmer-non-g", - "value": "2" - }, - { - "name": "trim-polyg-min-trim-r1-3prime", - "value": "6" - }, - { - "name": "trim-polyg-min-trim-r1-5prime", - "value": "6" - }, - { - "name": "trim-polyg-min-trim-r2-3prime", - "value": "6" - }, - { - "name": "trim-polyg-min-trim-r2-5prime", - "value": "6" - }, - { - "name": "umi-base-representation-min-ratio", - "value": "0.5" - }, - { - "name": "umi-correction-scheme", - "value": "lookup" - }, - { - "name": "umi-correction-table", - "value": "umi_correction_table.txt" - }, - { - "name": "umi-enable", - "value": "false" - }, - { - "name": "umi-enable-contextual-corrections", - "value": "true" - }, - { - "name": "umi-enable-duplex-merging", - "value": "true" - }, - { - "name": "umi-enable-probability-model-merging", - "value": "false" - }, - { - "name": "umi-enable-shift-corrections", - "value": "true" - }, - { - "name": "umi-enable-trimming", - "value": "true" - }, - { - "name": "umi-end-mask-length", - "value": "3" - }, - { - "name": "umi-generate-bam-tags", - "value": "true" - }, - { - "name": "umi-masked-base-qual", - "value": "1" - }, - { - "name": "umi-max-base-quality", - "value": "63" - }, - { - "name": "umi-mem-throttle-gb", - "value": "30" - }, - { - "name": "umi-min-map-quality", - "value": "0" - }, - { - "name": "umi-min-reads-per-region", - "value": "4096" - }, - { - "name": "umi-min-supporting-reads", - "value": "2" - }, - { - "name": "umi-padding", - "value": "A" - }, - { - "name": "umi-preserve-input-tags", - "value": "false" - }, - { - "name": "umi-probability-merging-duplex-merging-thres", - "value": "1" - }, - { - "name": "umi-probability-merging-max-transition-ratio", - "value": "10000" - }, - { - "name": "umi-probability-merging-min-isize-freq", - "value": "0.001" - }, - { - "name": "umi-probability-merging-seq-error", - "value": "0.001" - }, - { - "name": "umi-probability-merging-simplex-fuzzy-merging-thres", - "value": "1" - }, - { - "name": "umi-probability-merging-simplex-merging-thres", - "value": "1" - }, - { - "name": "umi-random-merge-factor", - "value": "2" - }, - { - "name": "umi-read-minority-min-ratio", - "value": "0.5" - }, - { - "name": "umi-soft-clip-ratio", - "value": "0.5" - }, - { - "name": "umi-source", - "value": "qname" - }, - { - "name": "umi-start-mask-length", - "value": "1" - }, - { - "name": "umi-stat-estimation-max-fragment-count", - "value": "30" - }, - { - "name": "umi-stat-estimation-max-fragment-size", - "value": "1000" - }, - { - "name": "umi-stat-estimation-max-interval-number", - "value": "50" - }, - { - "name": "umi-stat-estimation-min-probability-unique-fragment", - "value": "0.998" - }, - { - "name": "umi-stat-estimation-umi-jumping-estimation-method", - "value": "simplex" - }, - { - "name": "umi-trim-allowed-mismatches", - "value": "1" - }, - { - "name": "umi-verbose-metrics", - "value": "false" - }, - { - "name": "use-mock-config", - "value": "false" - }, - { - "name": "vc-active-only", - "value": "false" - }, - { - "name": "vc-decoy-contigs", - "value": "NC_007605 hs37d5 chrUn_KN707*v1_decoy chrUn_JTFH0100*v1_decoy KN707*.1 JTFH0100*.1 chrEBV CMV HBV HCV* HIV* KSHV HTLV* MCV SV40 HPV* " - }, - { - "name": "vc-emit-ref-confidence", - "value": "NONE" - }, - { - "name": "vc-emit-zero-coverage-intervals", - "value": "true" - }, - { - "name": "vc-enable-basecall-filter", - "value": "false" - }, - { - "name": "vc-enable-deterministic-run", - "value": "true" - }, - { - "name": "vc-enable-hw-hmm", - "value": "true" - }, - { - "name": "vc-enable-hw-hmm-dump-receiver-data", - "value": "false" - }, - { - "name": "vc-enable-hw-hmm-dump-sender-data", - "value": "false" - }, - { - "name": "vc-enable-hw-hmm-dump-worker-data", - "value": "false" - }, - { - "name": "vc-enable-hw-smw", - "value": "true" - }, - { - "name": "vc-enable-hw-smw-dump-receiver-data", - "value": "false" - }, - { - "name": "vc-enable-hw-smw-dump-sender-data", - "value": "false" - }, - { - "name": "vc-enable-hw-smw-dump-worker-data", - "value": "false" - }, - { - "name": "vc-hw-hmm-timeout", - "value": "100000" - }, - { - "name": "vc-limit-genomecov-output", - "value": "false" - }, - { - "name": "vc-max-alternate-alleles", - "value": "6" - }, - { - "name": "vc-max-haps-per-job", - "value": "2" - }, - { - "name": "vc-sw-cell-width", - "value": "16" - }, - { - "name": "vc-sw-instruction-set", - "value": "sse2" - }, - { - "name": "vc-sw-mode", - "value": "0" - }, - { - "name": "vqsr-lod-cutoff", - "value": "-5.0" - }, - { - "name": "vqsr-num-gaussians", - "value": "8,2,4,2" - }, - { - "name": "watchdog-active-timeout", - "value": "600" - }, - { - "name": "watchdog-dump-processes", - "value": "true;" - }, - { - "name": "watchdog-exit-on-hang", - "value": "true" - }, - { - "name": "watchdog-freemem-threshold", - "value": "2GB" - }, - { - "name": "watchdog-idle-timeout", - "value": "600" - }, - { - "name": "watchdog-max-threads", - "value": "8388608" - }, - { - "name": "watchdog-poll-interval", - "value": "1" - }, - { - "name": "watchdog-resources-monitored", - "value": "THREADS IO MEMORY " - }, - { - "name": "watchdog-verbose-logging", - "value": "true" - } - ], - "system": { - "dragen_version": "07.021.572.3.7.0-205-gbcf57830", - "nodename": "ussd-tst-drgn33.illumina.com", - "kernel_release": "3.10.0-1062.12.1.el7.x86_64" + "command_line": "/staging/rpetrovski/dragen/objtree_el7-master/bin/dragen --build-hash-table true --ht-reference tiny.fasta --output-directory tiny.v8", + "dragen_config": [ + { + "name": "Aligner.align-direction", + "value": "4" + }, + { + "name": "Aligner.aln-en-mask", + "value": "15" + }, + { + "name": "Aligner.aln-enable", + "value": "1" + }, + { + "name": "Aligner.backtrace-delay", + "value": "8" + }, + { + "name": "Aligner.dedup-min-qual", + "value": "15" + }, + { + "name": "Aligner.disable-lfsr", + "value": "0" + }, + { + "name": "Aligner.en-alt-hap-aln", + "value": "1" + }, + { + "name": "Aligner.en-chimeric-aln", + "value": "1" + }, + { + "name": "Aligner.exon-jump-en", + "value": "1" + }, + { + "name": "Aligner.filt-clip1-pair", + "value": "10" + }, + { + "name": "Aligner.filt-clip1-unpr", + "value": "-5" + }, + { + "name": "Aligner.filt-clip2-pair", + "value": "5" + }, + { + "name": "Aligner.filt-clip2-unpr", + "value": "-10" + }, + { + "name": "Aligner.filt-min-qual", + "value": "0" + }, + { + "name": "Aligner.fix-overlap-mapq", + "value": "0" + }, + { + "name": "Aligner.gap-ext-pen", + "value": "1" + }, + { + "name": "Aligner.gap-open-pen", + "value": "6" + }, + { + "name": "Aligner.global", + "value": "0" + }, + { + "name": "Aligner.hard-clips", + "value": "6" + }, + { + "name": "Aligner.hw-mapq-max", + "value": "250" + }, + { + "name": "Aligner.leftalign-mode", + "value": "2" + }, + { + "name": "Aligner.mapq-coeff", + "value": "152" + }, + { + "name": "Aligner.mapq-floor-1snp", + "value": "0" + }, + { + "name": "Aligner.mapq-max", + "value": "60" + }, + { + "name": "Aligner.mapq-min-len", + "value": "50" + }, + { + "name": "Aligner.mapq-strict-sjs", + "value": "0" + }, + { + "name": "Aligner.match-score", + "value": "1" + }, + { + "name": "Aligner.max-rescues", + "value": "1023" + }, + { + "name": "Aligner.max-stitch-olap", + "value": "48" + }, + { + "name": "Aligner.min-overhang", + "value": "6" + }, + { + "name": "Aligner.min-overhang-ann", + "value": "6" + }, + { + "name": "Aligner.min-score-coeff", + "value": "0.0" + }, + { + "name": "Aligner.mismatch-pen", + "value": "4" + }, + { + "name": "Aligner.no-align-score", + "value": "-8388608" + }, + { + "name": "Aligner.no-ambig-strand", + "value": "1" + }, + { + "name": "Aligner.no-noncan-motifs", + "value": "1" + }, + { + "name": "Aligner.no-unclip-score", + "value": "1" + }, + { + "name": "Aligner.no-unpaired", + "value": "0" + }, + { + "name": "Aligner.paired-mate-info", + "value": "1" + }, + { + "name": "Aligner.pe-max-penalty", + "value": "255" + }, + { + "name": "Aligner.pe-orientation", + "value": "0" + }, + { + "name": "Aligner.pe-sample-max-insert", + "value": "65535" + }, + { + "name": "Aligner.resc-ifpair-len", + "value": "48" + }, + { + "name": "Aligner.resc-nopair-len", + "value": "0" + }, + { + "name": "Aligner.rescue-ceil-factor", + "value": "3" + }, + { + "name": "Aligner.rescue-hifreq", + "value": "0" + }, + { + "name": "Aligner.rescue-kmer-len", + "value": "32" + }, + { + "name": "Aligner.rescue-max-snps", + "value": "7" + }, + { + "name": "Aligner.rna-max-insert", + "value": "4000000" + }, + { + "name": "Aligner.rna-pair-pen-rat", + "value": "5.71875" + }, + { + "name": "Aligner.sample-mapq0", + "value": "1" + }, + { + "name": "Aligner.sec-aligns", + "value": "0" + }, + { + "name": "Aligner.sec-aligns-hard", + "value": "0" + }, + { + "name": "Aligner.sec-phred-delta", + "value": "0" + }, + { + "name": "Aligner.sec-score-delta", + "value": "0" + }, + { + "name": "Aligner.supp-aligns", + "value": "3" + }, + { + "name": "Aligner.supp-as-sec", + "value": "0" + }, + { + "name": "Aligner.sw-all", + "value": "0" + }, + { + "name": "Aligner.sw-burst-diffs", + "value": "1" + }, + { + "name": "Aligner.sw-early-max", + "value": "256" + }, + { + "name": "Aligner.sw-extra-intvl", + "value": "1" + }, + { + "name": "Aligner.unclip-score", + "value": "5" + }, + { + "name": "Aligner.unpaired-pen", + "value": "80" + }, + { + "name": "Aligner.xs-pair-penalty", + "value": "25" + }, + { + "name": "Mapper.adapter-times", + "value": "0" + }, + { + "name": "Mapper.ann-sj-max-indel", + "value": "10" + }, + { + "name": "Mapper.chain-diam-lim", + "value": "8" + }, + { + "name": "Mapper.chain-rad-lim", + "value": "5" + }, + { + "name": "Mapper.cut-bases", + "value": "0" + }, + { + "name": "Mapper.dark-base", + "value": "2" + }, + { + "name": "Mapper.edit-chain-limit", + "value": "29" + }, + { + "name": "Mapper.edit-mode", + "value": "0" + }, + { + "name": "Mapper.edit-read-len", + "value": "100" + }, + { + "name": "Mapper.edit-seed-num", + "value": "6" + }, + { + "name": "Mapper.filt-good-qual", + "value": "0" + }, + { + "name": "Mapper.filter-len-ratio", + "value": "4" + }, + { + "name": "Mapper.intvl-max-hits", + "value": "16" + }, + { + "name": "Mapper.intvl-min-chains", + "value": "8" + }, + { + "name": "Mapper.intvl-sample-hits", + "value": "16" + }, + { + "name": "Mapper.intvl-seed-length", + "value": "60" + }, + { + "name": "Mapper.intvl-seed-longer", + "value": "8" + }, + { + "name": "Mapper.intvl-target-hits", + "value": "32" + }, + { + "name": "Mapper.map-orientations", + "value": "0" + }, + { + "name": "Mapper.max-dram-reqs", + "value": "0" + }, + { + "name": "Mapper.max-hifreq-hits", + "value": "16" + }, + { + "name": "Mapper.max-intron-bases", + "value": "200000" + }, + { + "name": "Mapper.max-lowq-bases", + "value": "4294967295" + }, + { + "name": "Mapper.max-lowq-ratio", + "value": "4294967295" + }, + { + "name": "Mapper.max-read-len", + "value": "4294967295" + }, + { + "name": "Mapper.max-seed-chains", + "value": "511" + }, + { + "name": "Mapper.max-splice-gap", + "value": "150" + }, + { + "name": "Mapper.max-splice-olap", + "value": "16" + }, + { + "name": "Mapper.min-intron-bases", + "value": "20" + }, + { + "name": "Mapper.min-trim-bases", + "value": "0" + }, + { + "name": "Mapper.n-base-qual", + "value": "2" + }, + { + "name": "Mapper.polyg-cutoff", + "value": "0" + }, + { + "name": "Mapper.qual-cutoff", + "value": "0" + }, + { + "name": "Mapper.read-edge-seeds", + "value": "0" + }, + { + "name": "Mapper.rna-filt-ratio", + "value": "25" + }, + { + "name": "Mapper.rna-max-covg-gap", + "value": "150" + }, + { + "name": "Mapper.rna-max-recurs", + "value": "65536" + }, + { + "name": "Mapper.rna-span-log-min", + "value": "13" + }, + { + "name": "Mapper.seed-density", + "value": "0.5" + }, + { + "name": "Mapper.seed-max-age", + "value": "31" + }, + { + "name": "Mapper.seed-old-age", + "value": "9" + }, + { + "name": "Mapper.splice-olap-adj", + "value": "4" + }, + { + "name": "Mapper.trace-mode", + "value": "0" + }, + { + "name": "Mapper.trace-offset", + "value": "0" + }, + { + "name": "Mapper.trace-read-id", + "value": "0" + }, + { + "name": "append-read-index-to-name", + "value": "false" + }, + { + "name": "assert-valid-cigar", + "value": "false" + }, + { + "name": "autodetect-reference-validate", + "value": "false" + }, + { + "name": "bam2dbam-threads", + "value": "12" + }, + { + "name": "bin-split-target-size", + "value": "104857600" + }, + { + "name": "bin-split-threshold", + "value": "7158278826" + }, + { + "name": "bin_memory", + "value": "21474836480" + }, + { + "name": "binner-use-odirect", + "value": "true" + }, + { + "name": "bqsr-context-low-quality-tail", + "value": "2" + }, + { + "name": "bqsr-cycle-indel-context", + "value": "3" + }, + { + "name": "bqsr-cycle-mismatch-context", + "value": "2" + }, + { + "name": "bqsr-emit-indel-tags", + "value": "true" + }, + { + "name": "bqsr-enable-recal-indels", + "value": "true" + }, + { + "name": "bqsr-match-gatk", + "value": "true" + }, + { + "name": "bqsr-max-cycle-value", + "value": "500" + }, + { + "name": "build-hash-table", + "value": "true" + }, + { + "name": "c2s_aligner_packet_size", + "value": "524288" + }, + { + "name": "c2s_aligner_pool_size", + "value": "64" + }, + { + "name": "c2s_decomp_packet_size", + "value": "1048576" + }, + { + "name": "c2s_decomp_pool_size", + "value": "64" + }, + { + "name": "c2s_graph_packet_size", + "value": "65536" + }, + { + "name": "c2s_graph_pool_size", + "value": "256" + }, + { + "name": "c2s_hmm_packet_size", + "value": "4096" + }, + { + "name": "c2s_hmm_pool_size", + "value": "128" + }, + { + "name": "c2s_smw_packet_size", + "value": "4096" + }, + { + "name": "c2s_smw_pool_size", + "value": "128" + }, + { + "name": "cgvcf-num-file-scan-threads", + "value": "8" + }, + { + "name": "cgvcf-save-tmp-files", + "value": "false" + }, + { + "name": "cgvcf-split-chromosomes", + "value": "false" + }, + { + "name": "combine-samples-by-name", + "value": "false" + }, + { + "name": "credentials-1", + "value": "" + }, + { + "name": "credentials-2", + "value": "" + }, + { + "name": "credentials-3", + "value": "" + }, + { + "name": "dbam2bam_threads", + "value": "32" + }, + { + "name": "debug", + "value": "0" + }, + { + "name": "disable_reg_validation", + "value": "0" + }, + { + "name": "distinct-dbam-input-format", + "value": "true" + }, + { + "name": "dump-hang-diag-first", + "value": "true" + }, + { + "name": "dump-map-align-registers", + "value": "0" + }, + { + "name": "dump_config", + "value": "0" + }, + { + "name": "dump_registers", + "value": "0" + }, + { + "name": "dupmark-version", + "value": "sort" + }, + { + "name": "echo_aligner_log", + "value": "0" + }, + { + "name": "echo_general_log", + "value": "0" + }, + { + "name": "echo_mapper_log", + "value": "0" + }, + { + "name": "enable-auto-multifile", + "value": "true" + }, + { + "name": "enable-bqsr", + "value": "false" + }, + { + "name": "enable-deterministic-sort", + "value": "true" + }, + { + "name": "enable-duplicate-marking", + "value": "false" + }, + { + "name": "enable-hang-diag", + "value": "true" + }, + { + "name": "enable-http-server", + "value": "false" + }, + { + "name": "enable-map-align", + "value": "true" + }, + { + "name": "enable-methylation-calling", + "value": "true" + }, + { + "name": "enable-pstack", + "value": "true" + }, + { + "name": "enable-public-bitstream", + "value": "false" + }, + { + "name": "enable-rna-quantification", + "value": "false" + }, + { + "name": "enable-sampling", + "value": "true" + }, + { + "name": "enable-single-cell-rna", + "value": "false" + }, + { + "name": "enable-sort", + "value": "true" + }, + { + "name": "enable-spin", + "value": "false" + }, + { + "name": "enable-umi-stat-estimator", + "value": "false" + }, + { + "name": "enable-variant-caller", + "value": "false" + }, + { + "name": "enable-vcf-indexing", + "value": "true" + }, + { + "name": "enable-watchdog", + "value": "true" + }, + { + "name": "enable-write-input-dbam", + "value": "false" + }, + { + "name": "evict_all_intermediate_results", + "value": "false" + }, + { + "name": "fastq-n-quality", + "value": "2" + }, + { + "name": "fastq-offset", + "value": "33" + }, + { + "name": "fastq2dbam_ratio", + "value": "2" + }, + { + "name": "fastq_block_size", + "value": "1048576" + }, + { + "name": "fastq_pool_size", + "value": "4" + }, + { + "name": "fastqc-adapter-file", + "value": "adapter_sequences.fasta" + }, + { + "name": "fastqc-granularity", + "value": "7" + }, + { + "name": "fastqc-only", + "value": "false" + }, + { + "name": "filter-flags-from-output", + "value": "0" + }, + { + "name": "gc-metrics-cover-percent", + "value": "75" + }, + { + "name": "gc-metrics-enable", + "value": "false" + }, + { + "name": "gc-metrics-num-bins", + "value": "5" + }, + { + "name": "gc-metrics-only-covered", + "value": "false" + }, + { + "name": "gc-metrics-window-size", + "value": "100" + }, + { + "name": "generate-en-tags", + "value": "false" + }, + { + "name": "generate-md-tags", + "value": "false" + }, + { + "name": "generate-xq-tags", + "value": "true" + }, + { + "name": "generate-zs-tags", + "value": "false" + }, + { + "name": "ht-anchor-bin-bits", + "value": "0" + }, + { + "name": "ht-cost-coeff-seed-freq", + "value": "0.5" + }, + { + "name": "ht-cost-coeff-seed-len", + "value": "1" + }, + { + "name": "ht-cost-penalty", + "value": "0" + }, + { + "name": "ht-cost-penalty-incr", + "value": "0.69999999999999996" + }, + { + "name": "ht-crc-extended", + "value": "0" + }, + { + "name": "ht-crc-primary", + "value": "0" + }, + { + "name": "ht-dump-int-params", + "value": "0" + }, + { + "name": "ht-ext-rec-cost", + "value": "4" + }, + { + "name": "ht-max-dec-factor", + "value": "1" + }, + { + "name": "ht-max-ext-incr", + "value": "12" + }, + { + "name": "ht-max-ext-seed-len", + "value": "0" + }, + { + "name": "ht-max-seed-freq", + "value": "16" + }, + { + "name": "ht-max-seed-freq-len", + "value": "98" + }, + { + "name": "ht-max-table-chunks", + "value": "0" + }, + { + "name": "ht-mem-limit", + "value": "0GB" + }, + { + "name": "ht-methylated", + "value": "false" + }, + { + "name": "ht-min-repair-prob", + "value": "0.20000000000000001" + }, + { + "name": "ht-override-size-check", + "value": "0" + }, + { + "name": "ht-pri-max-seed-freq", + "value": "0" + }, + { + "name": "ht-rand-hit-extend", + "value": "8" + }, + { + "name": "ht-rand-hit-hifreq", + "value": "1" + }, + { + "name": "ht-ref-seed-interval", + "value": "1" + }, + { + "name": "ht-reference", + "value": "tiny.fasta" + }, + { + "name": "ht-repair-strategy", + "value": "0" + }, + { + "name": "ht-seed-len", + "value": "21" + }, + { + "name": "ht-size", + "value": "0GB" + }, + { + "name": "ht-soft-seed-freq-cap", + "value": "12" + }, + { + "name": "ht-target-seed-freq", + "value": "4" + }, + { + "name": "ht-test-only", + "value": "0" + }, + { + "name": "ht-write-hash-bin", + "value": "0" + }, + { + "name": "http-server-port", + "value": "7993" + }, + { + "name": "input-qname-suffix-delimiter", + "value": "/" + }, + { + "name": "linkedreads-correction-table1", + "value": "linkedreads_corrections_1.txt" + }, + { + "name": "linkedreads-correction-table2", + "value": "linkedreads_corrections_2.txt" + }, + { + "name": "linkedreads-correction-table3", + "value": "linkedreads_corrections_3.txt" + }, + { + "name": "linkedreads-enable", + "value": "false" + }, + { + "name": "logfile_prefix", + "value": "/opt/edico/logs/" + }, + { + "name": "mapper_cigar", + "value": "0" + }, + { + "name": "max_bin_size", + "value": "943718400" + }, + { + "name": "max_ios_inflight", + "value": "1024" + }, + { + "name": "methylation-TAPS", + "value": "false" + }, + { + "name": "methylation-generate-cytosine-report", + "value": "false" + }, + { + "name": "methylation-generate-mbias-report", + "value": "false" + }, + { + "name": "methylation-match-bismark", + "value": "false" + }, + { + "name": "methylation-protocol", + "value": "none" + }, + { + "name": "methylation-reports-only", + "value": "false" + }, + { + "name": "min-predicted-output-gb", + "value": "200" + }, + { + "name": "multiplier", + "value": "1" + }, + { + "name": "no-reset", + "value": "false" + }, + { + "name": "output-directory", + "value": "tiny.v8" + }, + { + "name": "output-format", + "value": "bam" + }, + { + "name": "pair-by-name", + "value": "true" + }, + { + "name": "pair-suffix-delimiter", + "value": "/" + }, + { + "name": "partition-on-compression-bottleneck", + "value": "false" + }, + { + "name": "pe-stats-continuous-update", + "value": "false" + }, + { + "name": "pe-stats-interval-delay", + "value": "5" + }, + { + "name": "pe-stats-interval-memory", + "value": "10" + }, + { + "name": "pe-stats-interval-size", + "value": "25000" + }, + { + "name": "pe-stats-sample-size", + "value": "100000" + }, + { + "name": "pe-stats-update-log-only", + "value": "false" + }, + { + "name": "preserve-bqsr-tags", + "value": "false" + }, + { + "name": "preserve-map-align-order", + "value": "false" + }, + { + "name": "qc-indel-denovo-quality-threshold", + "value": "0.02" + }, + { + "name": "qc-snp-denovo-quality-threshold", + "value": "0.050000000000000003" + }, + { + "name": "read-trimmers", + "value": "none" + }, + { + "name": "recordset-memory", + "value": "1073741824" + }, + { + "name": "reg_errors_are_warnings", + "value": "0" + }, + { + "name": "remove-duplicates", + "value": "false" + }, + { + "name": "repeat-genotype-enable", + "value": "false" + }, + { + "name": "repeat-genotype-min-anchor-mapq", + "value": "60" + }, + { + "name": "repeat-genotype-min-baseq", + "value": "20" + }, + { + "name": "repeat-genotype-min-score", + "value": "0.90000000000000002" + }, + { + "name": "repeat-genotype-read-depth", + "value": "0" + }, + { + "name": "repeat-genotype-read-length", + "value": "0" + }, + { + "name": "repeat-genotype-region-extension-length", + "value": "1000" + }, + { + "name": "repeat-genotype-skip-unaligned", + "value": "true" + }, + { + "name": "repeat-genotype-specs", + "value": "" + }, + { + "name": "rna-ann-sj-min-len", + "value": "6" + }, + { + "name": "rna-cv-min-expression", + "value": "0" + }, + { + "name": "rna-gf-aggressive-filters", + "value": "false" + }, + { + "name": "rna-gf-blast-pairs", + "value": "" + }, + { + "name": "rna-gf-coverage-lookup-window", + "value": "1000" + }, + { + "name": "rna-gf-enriched-genes", + "value": "" + }, + { + "name": "rna-gf-enriched-only", + "value": "false" + }, + { + "name": "rna-gf-exon-snap", + "value": "50" + }, + { + "name": "rna-gf-mate-overhang", + "value": "8" + }, + { + "name": "rna-gf-max-partners", + "value": "3" + }, + { + "name": "rna-gf-merge-calls", + "value": "true" + }, + { + "name": "rna-gf-min-alt-to-ref", + "value": "0.00999999978" + }, + { + "name": "rna-gf-min-anchor", + "value": "12" + }, + { + "name": "rna-gf-min-blast-pairs-eval", + "value": "1e-100" + }, + { + "name": "rna-gf-min-breakpoint-mapq", + "value": "20" + }, + { + "name": "rna-gf-min-cis-distance", + "value": "200000" + }, + { + "name": "rna-gf-min-covered-bases", + "value": "125" + }, + { + "name": "rna-gf-min-covered-bases-uncaptured", + "value": "79" + }, + { + "name": "rna-gf-min-neighbor-dist", + "value": "15" + }, + { + "name": "rna-gf-min-score", + "value": "0.5" + }, + { + "name": "rna-gf-min-score-ratio", + "value": "0.150000006" + }, + { + "name": "rna-gf-min-support", + "value": "2" + }, + { + "name": "rna-gf-min-support-be", + "value": "10" + }, + { + "name": "rna-gf-min-unique-alignments", + "value": "2" + }, + { + "name": "rna-gf-num-threads", + "value": "4" + }, + { + "name": "rna-gf-ref-anchor", + "value": "8" + }, + { + "name": "rna-gf-restrict-genes", + "value": "true" + }, + { + "name": "rna-gf-score-model", + "value": "" + }, + { + "name": "rna-mapq-unique", + "value": "0" + }, + { + "name": "rna-quantification-fld-max", + "value": "1000" + }, + { + "name": "rna-quantification-fld-mean", + "value": "250" + }, + { + "name": "rna-quantification-fld-sd", + "value": "25" + }, + { + "name": "rna-quantification-full-concordance", + "value": "false" + }, + { + "name": "rna-quantification-gc-bias", + "value": "true" + }, + { + "name": "rna-quantification-inference-max", + "value": "10000" + }, + { + "name": "rna-quantification-inference-min", + "value": "100" + }, + { + "name": "rna-quantification-init-uniform", + "value": "0" + }, + { + "name": "rna-quantification-library-type", + "value": "A" + }, + { + "name": "rna-quantification-tlen-min", + "value": "500" + }, + { + "name": "rna-quantification-use-em", + "value": "0" + }, + { + "name": "rna_aligner_buffer_size", + "value": "67108864" + }, + { + "name": "rna_aligner_buffers", + "value": "0" + }, + { + "name": "s2c_dbam_block_size", + "value": "65536" + }, + { + "name": "s2c_dbam_pool_size", + "value": "32" + }, + { + "name": "s2c_decomp_block_size", + "value": "262144" + }, + { + "name": "s2c_decomp_pool_size", + "value": "64" + }, + { + "name": "s2c_graph_block_size", + "value": "16384" + }, + { + "name": "s2c_graph_pool_size", + "value": "256" + }, + { + "name": "s2c_hmm_block_size", + "value": "16384" + }, + { + "name": "s2c_hmm_pool_size", + "value": "16384" + }, + { + "name": "s2c_phase1_packet_size", + "value": "16384" + }, + { + "name": "s2c_phase2_packet_size", + "value": "16384" + }, + { + "name": "s2c_smw_block_size", + "value": "16384" + }, + { + "name": "s2c_smw_pool_size", + "value": "2048" + }, + { + "name": "single-cell-barcode", + "value": "16" + }, + { + "name": "single-cell-count-introns", + "value": "false" + }, + { + "name": "single-cell-global-umi", + "value": "false" + }, + { + "name": "single-cell-type", + "value": "simple" + }, + { + "name": "single-cell-umi", + "value": "8" + }, + { + "name": "soft-read-trimmers", + "value": "polyg" + }, + { + "name": "sort_buffer_size", + "value": "1048576" + }, + { + "name": "stop-at-read", + "value": "0" + }, + { + "name": "strip-input-qname-suffixes", + "value": "true" + }, + { + "name": "sv-denovo-threshold", + "value": "20" + }, + { + "name": "sv-enable-rrm-for-insertions-in-cancer-calling-modes", + "value": "true" + }, + { + "name": "sv-enable-rrm-for-insertions-in-germline-calling-modes", + "value": "true" + }, + { + "name": "sv-generate-evidence-bam", + "value": "false" + }, + { + "name": "sv-graph-node-max-edge-count", + "value": "10" + }, + { + "name": "sv-hyper-sensitivity", + "value": "false" + }, + { + "name": "sv-min-candidate-spanning-count", + "value": "3" + }, + { + "name": "sv-min-candidate-variant-size", + "value": "8" + }, + { + "name": "sv-min-diploid-variant-score", + "value": "10" + }, + { + "name": "sv-min-edge-observations", + "value": "3" + }, + { + "name": "sv-min-pass-diploid-gt-score", + "value": "15" + }, + { + "name": "sv-min-pass-diploid-variant-score", + "value": "20" + }, + { + "name": "sv-min-pass-somatic-score", + "value": "30" + }, + { + "name": "sv-min-scored-variant-size", + "value": "50" + }, + { + "name": "sv-min-somatic-score", + "value": "10" + }, + { + "name": "sv-mobile-element-sequences", + "value": "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACGAGGTCAGGAGATCGAGACCATCCCGGCTAAAACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAATTAGCCGGGCGTAGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCCCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTC GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGTGGATCATGAGGTCAGGAGATCGAGACCATCCTGGCTAACAAGGTGAAACCCCGTCTCTACTAAAAATACAAAAAATTAGCCGGGCGCGGTGGCGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCGGGAAGCGGAGCTTGCAGTGAGCCGAGATTGCGCCACTGCAGTCCGCAGTCCGGCCTGGGCGACAGAGCGAGACTCCGTCTC " + }, + { + "name": "sv-rna-min-candidate-variant-size", + "value": "1000" + }, + { + "name": "sv-use-overlap-pair-evidence", + "value": "false" + }, + { + "name": "trim-disable-mapping", + "value": "false" + }, + { + "name": "trim-filter-dummy-len", + "value": "10" + }, + { + "name": "trim-filter-set-flag", + "value": "true" + }, + { + "name": "trim-min-len-read1", + "value": "20" + }, + { + "name": "trim-min-len-read2", + "value": "20" + }, + { + "name": "trim-polyg-early-exit-threshold", + "value": "-500" + }, + { + "name": "trim-polyg-g-score-r1-3prime", + "value": "15" + }, + { + "name": "trim-polyg-g-score-r1-5prime", + "value": "0" + }, + { + "name": "trim-polyg-g-score-r2-3prime", + "value": "15" + }, + { + "name": "trim-polyg-g-score-r2-5prime", + "value": "0" + }, + { + "name": "trim-polyg-kmer-len", + "value": "25" + }, + { + "name": "trim-polyg-kmer-non-g", + "value": "2" + }, + { + "name": "trim-polyg-min-trim-r1-3prime", + "value": "6" + }, + { + "name": "trim-polyg-min-trim-r1-5prime", + "value": "6" + }, + { + "name": "trim-polyg-min-trim-r2-3prime", + "value": "6" + }, + { + "name": "trim-polyg-min-trim-r2-5prime", + "value": "6" + }, + { + "name": "umi-base-representation-min-ratio", + "value": "0.5" + }, + { + "name": "umi-correction-scheme", + "value": "lookup" + }, + { + "name": "umi-correction-table", + "value": "umi_correction_table.txt" + }, + { + "name": "umi-enable", + "value": "false" + }, + { + "name": "umi-enable-contextual-corrections", + "value": "true" + }, + { + "name": "umi-enable-duplex-merging", + "value": "true" + }, + { + "name": "umi-enable-probability-model-merging", + "value": "false" + }, + { + "name": "umi-enable-shift-corrections", + "value": "true" + }, + { + "name": "umi-enable-trimming", + "value": "true" + }, + { + "name": "umi-end-mask-length", + "value": "3" + }, + { + "name": "umi-generate-bam-tags", + "value": "true" + }, + { + "name": "umi-masked-base-qual", + "value": "1" + }, + { + "name": "umi-max-base-quality", + "value": "63" + }, + { + "name": "umi-mem-throttle-gb", + "value": "30" + }, + { + "name": "umi-min-map-quality", + "value": "0" + }, + { + "name": "umi-min-reads-per-region", + "value": "4096" + }, + { + "name": "umi-min-supporting-reads", + "value": "2" + }, + { + "name": "umi-padding", + "value": "A" + }, + { + "name": "umi-preserve-input-tags", + "value": "false" + }, + { + "name": "umi-probability-merging-duplex-merging-thres", + "value": "1" + }, + { + "name": "umi-probability-merging-max-transition-ratio", + "value": "10000" + }, + { + "name": "umi-probability-merging-min-isize-freq", + "value": "0.001" + }, + { + "name": "umi-probability-merging-seq-error", + "value": "0.001" + }, + { + "name": "umi-probability-merging-simplex-fuzzy-merging-thres", + "value": "1" + }, + { + "name": "umi-probability-merging-simplex-merging-thres", + "value": "1" + }, + { + "name": "umi-random-merge-factor", + "value": "2" + }, + { + "name": "umi-read-minority-min-ratio", + "value": "0.5" + }, + { + "name": "umi-soft-clip-ratio", + "value": "0.5" + }, + { + "name": "umi-source", + "value": "qname" + }, + { + "name": "umi-start-mask-length", + "value": "1" + }, + { + "name": "umi-stat-estimation-max-fragment-count", + "value": "30" + }, + { + "name": "umi-stat-estimation-max-fragment-size", + "value": "1000" + }, + { + "name": "umi-stat-estimation-max-interval-number", + "value": "50" + }, + { + "name": "umi-stat-estimation-min-probability-unique-fragment", + "value": "0.998" + }, + { + "name": "umi-stat-estimation-umi-jumping-estimation-method", + "value": "simplex" + }, + { + "name": "umi-trim-allowed-mismatches", + "value": "1" + }, + { + "name": "umi-verbose-metrics", + "value": "false" + }, + { + "name": "use-mock-config", + "value": "false" + }, + { + "name": "vc-active-only", + "value": "false" + }, + { + "name": "vc-decoy-contigs", + "value": "NC_007605 hs37d5 chrUn_KN707*v1_decoy chrUn_JTFH0100*v1_decoy KN707*.1 JTFH0100*.1 chrEBV CMV HBV HCV* HIV* KSHV HTLV* MCV SV40 HPV* " + }, + { + "name": "vc-emit-ref-confidence", + "value": "NONE" + }, + { + "name": "vc-emit-zero-coverage-intervals", + "value": "true" + }, + { + "name": "vc-enable-basecall-filter", + "value": "false" + }, + { + "name": "vc-enable-deterministic-run", + "value": "true" + }, + { + "name": "vc-enable-hw-hmm", + "value": "true" + }, + { + "name": "vc-enable-hw-hmm-dump-receiver-data", + "value": "false" + }, + { + "name": "vc-enable-hw-hmm-dump-sender-data", + "value": "false" + }, + { + "name": "vc-enable-hw-hmm-dump-worker-data", + "value": "false" + }, + { + "name": "vc-enable-hw-smw", + "value": "true" + }, + { + "name": "vc-enable-hw-smw-dump-receiver-data", + "value": "false" + }, + { + "name": "vc-enable-hw-smw-dump-sender-data", + "value": "false" + }, + { + "name": "vc-enable-hw-smw-dump-worker-data", + "value": "false" + }, + { + "name": "vc-hw-hmm-timeout", + "value": "100000" + }, + { + "name": "vc-limit-genomecov-output", + "value": "false" + }, + { + "name": "vc-max-alternate-alleles", + "value": "6" + }, + { + "name": "vc-max-haps-per-job", + "value": "2" + }, + { + "name": "vc-sw-cell-width", + "value": "16" + }, + { + "name": "vc-sw-instruction-set", + "value": "sse2" + }, + { + "name": "vc-sw-mode", + "value": "0" + }, + { + "name": "vqsr-lod-cutoff", + "value": "-5.0" + }, + { + "name": "vqsr-num-gaussians", + "value": "8,2,4,2" + }, + { + "name": "watchdog-active-timeout", + "value": "600" + }, + { + "name": "watchdog-dump-processes", + "value": "true;" + }, + { + "name": "watchdog-exit-on-hang", + "value": "true" + }, + { + "name": "watchdog-freemem-threshold", + "value": "2GB" + }, + { + "name": "watchdog-idle-timeout", + "value": "600" + }, + { + "name": "watchdog-max-threads", + "value": "8388608" + }, + { + "name": "watchdog-poll-interval", + "value": "1" + }, + { + "name": "watchdog-resources-monitored", + "value": "THREADS IO MEMORY " + }, + { + "name": "watchdog-verbose-logging", + "value": "true" } + ], + "system": { + "dragen_version": "07.021.572.3.7.0-205-gbcf57830", + "nodename": "ussd-tst-drgn33.illumina.com", + "kernel_release": "3.10.0-1062.12.1.el7.x86_64" + } } diff --git a/doc/developer.md b/doc/developer.md index 94a1109..9d0837f 100644 --- a/doc/developer.md +++ b/doc/developer.md @@ -1,14 +1,13 @@ - # Directory structure ## Top level Only the strict minimum: -* README.md -* LICENSE and COPYRIGHT -* Makefile -* config.mk +- README.md +- LICENSE and COPYRIGHT +- Makefile +- config.mk ## make subdirectory @@ -18,9 +17,9 @@ All the included makefiles for the build system All aspects of the documentation: -* usage -* developer -* method +- usage +- developer +- method ## make subdirectory @@ -47,9 +46,10 @@ See "src/include/common/Exceptions.hh" and "src/lib/common/Exceptions.cpp" for e Style preference: currently there is a mixture of many difference styles but the preference should be towards camelCase vesus underscore_separated with: -* UpperCamelCase for Classes -* lowerCamelCase for methods, functions, parameter names and local variables -* m_UpperCameLCase for class members + +- UpperCamelCase for Classes +- lowerCamelCase for methods, functions, parameter names and local variables +- m_UpperCameLCase for class members # Build system @@ -63,11 +63,11 @@ tests and "install.mk" just once. The lists of programs, libraries, compilation units (CU) and integration tests are discovered with glob expressions: -* programs: "*.cpp" -* libraries: "src/lib/*/*.cpp" then using the unique paths. The first wiltcard is used for the library name. The +- programs: "\*.cpp" +- libraries: "src/lib/_/_.cpp" then using the unique paths. The first wiltcard is used for the library name. The second wildcard is used for the list of compilation units for each library. -* unit tests: "src/lib/$(library)/tests/*Fixture.cpp" -* integration tests: "tests/integration/*.cpp" +- unit tests: "src/lib/$(library)/tests/\*Fixture.cpp" +- integration tests: "tests/integration/\*.cpp" The build output is either the subdirectory "build/release" or "build/debug", depending on the build type. @@ -79,7 +79,7 @@ the dependencies - when the list of goals explicitly contains only generis targe ## Programs Compilation with DEPFLAGS, CPPFLAGS and CXXFLAGS. Linking with CPPFLAGS, CXXFLAGS and LDFLAGS. There is a dummy -empty target for the dependency file for each program. There is an include of the corresponding dependency file +empty target for the dependency file for each program. There is an include of the corresponding dependency file for each program. DEPFLAGS are just the usual macro "-MT $@ -MMD -MP -MF $(@:%.o=%.Td)" to generate the dependencies. Note that @@ -89,7 +89,7 @@ generation of the dependencies. CPPFLAGS specify c++11, all warnings, add boost as additional system include path, optimization and debug level. -CXXFLAGS controls the outpu of the compiler depending on the build type. In debug mode, it would only adding the +CXXFLAGS controls the outpu of the compiler depending on the build type. In debug mode, it would only adding the sanitize (address, leak and undefined), coverage and profiling information. In release mode, it would be all the meaningful optimizations. @@ -108,7 +108,7 @@ Each library include "make/gtest.mk" for the optional unit tests associated to e ## Unit tests -The unit tests are built and run in isolation for each CU. The main program to execute the unit tests is +The unit tests are built and run in isolation for each CU. The main program to execute the unit tests is the default provided in libgteest_main. The unit tests are for each CU - they are optional though. For a CU CompilationUnit, three files are expected: @@ -119,18 +119,17 @@ generate a wrapper "CompilationUnitWrapper.cpp": #include "CompilationUnit.cpp" Both "CompilationUnitFixture.cpp" and "CompilationUnitWrapper.cpp" are compiled and then linked to "testRunner.o" -to create "testCompilationUnit" which is then executed to create "passedCompilationUnit" which is then added to +to create "testCompilationUnit" which is then executed to create "passedCompilationUnit" which is then added to the list of dependencies for the library. # Testing ## Integration tests -Each integration test is compiled and linked as the programs are. +Each integration test is compiled and linked as the programs are. TODO: add support for the automation of the execution of these tests. ## Acceptance tests Not supported - diff --git a/doc/readme.md b/doc/readme.md index b2db93b..6e82bec 100644 --- a/doc/readme.md +++ b/doc/readme.md @@ -1,20 +1,18 @@ -# Dragen mapper/aligner - software version +# Dragen mapper/aligner - software version -## Basic command line usage +## Basic command line usage -### Command line options +### Command line options dragen-os --help - -### Build hash table of a reference fasta file +### Build hash table of a reference fasta file dragen-os --build-hash-table true --ht-reference reference.fasta --output-directory /home/data/reference/ - ### Align paired-end reads : -Output result to standard output +Output result to standard output dragen-os -r /home/data/reference/ -1 reads_1.fastq.gz -2 reads_2.fastq.gz > result.sam @@ -26,35 +24,32 @@ Or directly to a file : dragen-os -r /home/data/reference/ -1 reads_1.fastq.gz > result.sam - - ## Requirements -The binary was built using Centos 7. +The binary was built using Centos 7. It requires the following dynamic libraries to run : - ldd dragen-os - linux-vdso.so.1 => (0x00007ffe643f3000) - libboost_system.so.1.53.0 => /lib64/libboost_system.so.1.53.0 (0x00007f9a7c88e000) - libboost_filesystem.so.1.53.0 => /lib64/libboost_filesystem.so.1.53.0 (0x00007f9a7c677000) - libboost_date_time.so.1.53.0 => /lib64/libboost_date_time.so.1.53.0 (0x00007f9a7c466000) - libboost_thread-mt.so.1.53.0 => /lib64/libboost_thread-mt.so.1.53.0 (0x00007f9a7c24f000) - libboost_system-mt.so.1.53.0 => /lib64/libboost_system-mt.so.1.53.0 (0x00007f9a7c04b000) - libboost_iostreams.so.1.53.0 => /lib64/libboost_iostreams.so.1.53.0 (0x00007f9a7be31000) - libboost_regex.so.1.53.0 => /lib64/libboost_regex.so.1.53.0 (0x00007f9a7bb2e000) - libboost_program_options.so.1.53.0 => /lib64/libboost_program_options.so.1.53.0 (0x00007f9a7b8bc000) - libz.so.1 => /lib64/libz.so.1 (0x00007f9a7b6a6000) - libstdc++.so.6 => /lib64/libstdc++.so.6 (0x00007f9a7b39f000) - librt.so.1 => /lib64/librt.so.1 (0x00007f9a7b197000) - libgomp.so.1 => /lib64/libgomp.so.1 (0x00007f9a7af71000) - libpthread.so.0 => /lib64/libpthread.so.0 (0x00007f9a7ad55000) - libm.so.6 => /lib64/libm.so.6 (0x00007f9a7aa53000) - libgcc_s.so.1 => /lib64/libgcc_s.so.1 (0x00007f9a7a83d000) - libc.so.6 => /lib64/libc.so.6 (0x00007f9a7a46f000) - libbz2.so.1 => /lib64/libbz2.so.1 (0x00007f9a7a25f000) - libicuuc.so.50 => /lib64/libicuuc.so.50 (0x00007f9a79ee6000) - libicui18n.so.50 => /lib64/libicui18n.so.50 (0x00007f9a79ae7000) - libicudata.so.50 => /lib64/libicudata.so.50 (0x00007f9a78514000) - /lib64/ld-linux-x86-64.so.2 (0x00007f9a7ca92000) - libdl.so.2 => /lib64/libdl.so.2 (0x00007f9a78310000) - + ldd dragen-os + linux-vdso.so.1 => (0x00007ffe643f3000) + libboost_system.so.1.53.0 => /lib64/libboost_system.so.1.53.0 (0x00007f9a7c88e000) + libboost_filesystem.so.1.53.0 => /lib64/libboost_filesystem.so.1.53.0 (0x00007f9a7c677000) + libboost_date_time.so.1.53.0 => /lib64/libboost_date_time.so.1.53.0 (0x00007f9a7c466000) + libboost_thread-mt.so.1.53.0 => /lib64/libboost_thread-mt.so.1.53.0 (0x00007f9a7c24f000) + libboost_system-mt.so.1.53.0 => /lib64/libboost_system-mt.so.1.53.0 (0x00007f9a7c04b000) + libboost_iostreams.so.1.53.0 => /lib64/libboost_iostreams.so.1.53.0 (0x00007f9a7be31000) + libboost_regex.so.1.53.0 => /lib64/libboost_regex.so.1.53.0 (0x00007f9a7bb2e000) + libboost_program_options.so.1.53.0 => /lib64/libboost_program_options.so.1.53.0 (0x00007f9a7b8bc000) + libz.so.1 => /lib64/libz.so.1 (0x00007f9a7b6a6000) + libstdc++.so.6 => /lib64/libstdc++.so.6 (0x00007f9a7b39f000) + librt.so.1 => /lib64/librt.so.1 (0x00007f9a7b197000) + libgomp.so.1 => /lib64/libgomp.so.1 (0x00007f9a7af71000) + libpthread.so.0 => /lib64/libpthread.so.0 (0x00007f9a7ad55000) + libm.so.6 => /lib64/libm.so.6 (0x00007f9a7aa53000) + libgcc_s.so.1 => /lib64/libgcc_s.so.1 (0x00007f9a7a83d000) + libc.so.6 => /lib64/libc.so.6 (0x00007f9a7a46f000) + libbz2.so.1 => /lib64/libbz2.so.1 (0x00007f9a7a25f000) + libicuuc.so.50 => /lib64/libicuuc.so.50 (0x00007f9a79ee6000) + libicui18n.so.50 => /lib64/libicui18n.so.50 (0x00007f9a79ae7000) + libicudata.so.50 => /lib64/libicudata.so.50 (0x00007f9a78514000) + /lib64/ld-linux-x86-64.so.2 (0x00007f9a7ca92000) + libdl.so.2 => /lib64/libdl.so.2 (0x00007f9a78310000) diff --git a/flake.nix b/flake.nix index ae512a7..da0b6d8 100644 --- a/flake.nix +++ b/flake.nix @@ -25,10 +25,7 @@ perSystem = { config, - self', - inputs', pkgs, - system, ... }: { # Per-system attributes can be defined here. The self' and inputs' diff --git a/src/compare.cpp b/src/compare.cpp index 9203eb7..be54d57 100644 --- a/src/compare.cpp +++ b/src/compare.cpp @@ -1,4 +1,8 @@ #include +#include +#include +#include +#include #include #include #include @@ -12,11 +16,6 @@ #include #include -#include -#include -#include -#include - class Sam { public: Sam& operator=(const std::string& line); diff --git a/src/include/align/Aligner.hpp b/src/include/align/Aligner.hpp index 7a7ab31..8888f4c 100644 --- a/src/include/align/Aligner.hpp +++ b/src/include/align/Aligner.hpp @@ -28,8 +28,8 @@ namespace align { class Aligner { public: - Aligner() = delete; - Aligner(const Aligner&) = delete; + Aligner() = delete; + Aligner(const Aligner&) = delete; Aligner& operator=(const Aligner&) = delete; // Aligner(Aligner&&) = delete; //Aligner &operator=(Aligner&&) = delete; diff --git a/src/include/align/Alignment.hpp b/src/include/align/Alignment.hpp index 1dee1e8..39d416b 100644 --- a/src/include/align/Alignment.hpp +++ b/src/include/align/Alignment.hpp @@ -193,11 +193,10 @@ class Alignment : public AlignmentHeader { int64_t getUnclippedAlignmentCoordinate() const { - const int64_t offset = isUnmapped() - ? 0 - : (isReverseComplement()) ? (getCigar().getReferenceLengthPlusEndClips() - 1) - : -int64_t(getCigar().countStartClips()); - int64_t result = static_cast(getPosition()) + offset; + const int64_t offset = isUnmapped() ? 0 + : (isReverseComplement()) ? (getCigar().getReferenceLengthPlusEndClips() - 1) + : -int64_t(getCigar().countStartClips()); + int64_t result = static_cast(getPosition()) + offset; return result; } diff --git a/src/include/align/AlignmentRescue.hpp b/src/include/align/AlignmentRescue.hpp index e560e38..895a94c 100644 --- a/src/include/align/AlignmentRescue.hpp +++ b/src/include/align/AlignmentRescue.hpp @@ -16,8 +16,10 @@ #define ALIGN_ALIGNMENT_RESCUE_HPP #include + #include #include + #include "align/Alignment.hpp" #include "align/InsertSizeParameters.hpp" #include "map/ChainBuilder.hpp" @@ -207,12 +209,10 @@ class AlignmentRescue { **/ bool isReversedRescue(const SeedChain& anchoredChain) const { - return (pe_orientation_ == pe_orient_fr_c) - ? (!anchoredChain.isReverseComplement()) - : (pe_orientation_ == pe_orient_rf_c) - ? (!anchoredChain.isReverseComplement()) - : (pe_orientation_ == pe_orient_ff_c) ? (false) - : /* (pe_orientation_==pe_orient_rr_c) ? */ (true); + return (pe_orientation_ == pe_orient_fr_c) ? (!anchoredChain.isReverseComplement()) + : (pe_orientation_ == pe_orient_rf_c) ? (!anchoredChain.isReverseComplement()) + : (pe_orientation_ == pe_orient_ff_c) ? (false) + : /* (pe_orientation_==pe_orient_rr_c) ? */ (true); } #if 0 diff --git a/src/include/align/CalculateRefStartEnd.hpp b/src/include/align/CalculateRefStartEnd.hpp index 1980967..7945388 100644 --- a/src/include/align/CalculateRefStartEnd.hpp +++ b/src/include/align/CalculateRefStartEnd.hpp @@ -16,6 +16,7 @@ #define ALIGN_CALCULATE_REF_START_END_HPP #include + #include "map/SeedChain.hpp" #include "sequences/Read.hpp" diff --git a/src/include/align/Cigar.hpp b/src/include/align/Cigar.hpp index 1ea0f23..6d9b6a6 100644 --- a/src/include/align/Cigar.hpp +++ b/src/include/align/Cigar.hpp @@ -15,12 +15,11 @@ #ifndef ALIGN_CIGAR_HPP #define ALIGN_CIGAR_HPP -#include -#include - #include #include #include +#include +#include #include "common/Debug.hpp" diff --git a/src/include/align/InsertSizeDistribution.hpp b/src/include/align/InsertSizeDistribution.hpp index 167c8fa..80a459a 100644 --- a/src/include/align/InsertSizeDistribution.hpp +++ b/src/include/align/InsertSizeDistribution.hpp @@ -16,7 +16,6 @@ #define ALIGN_INSERT_SIZE_DISTRIBUTION_HPP #include "align/InsertSizeParameters.hpp" - #include "host/dragen_api/sampling/readgroup_insert_stats.hpp" namespace dragenos { diff --git a/src/include/align/InsertSizeParameters.hpp b/src/include/align/InsertSizeParameters.hpp index 4f2a6a9..30a81ec 100644 --- a/src/include/align/InsertSizeParameters.hpp +++ b/src/include/align/InsertSizeParameters.hpp @@ -16,6 +16,7 @@ #define ALIGN_INSERT_SIZE_PARAMETERS_HPP #include + #include #include #include diff --git a/src/include/align/PairBuilder.hpp b/src/include/align/PairBuilder.hpp index 84b3a00..c5db3f2 100644 --- a/src/include/align/PairBuilder.hpp +++ b/src/include/align/PairBuilder.hpp @@ -16,10 +16,10 @@ #define ALIGN_PAIR_BUILDER_HPP #include -#include -#include #include +#include +#include #include "align/AlignmentGenerator.hpp" #include "align/Alignments.hpp" diff --git a/src/include/align/Query.hpp b/src/include/align/Query.hpp index 33c52cc..af41cd9 100644 --- a/src/include/align/Query.hpp +++ b/src/include/align/Query.hpp @@ -45,12 +45,12 @@ class Query : public sequences::Read::Bases // public std::vector(std::forward(args)...), beginOffset_(0) {} Query& operator=(const Bases bases) { - beginOffset_ = 0; + beginOffset_ = 0; Bases::operator=(bases); return *this; } void setBeginOffset(const size_t offset) { beginOffset_ = offset; } -// void incrementBeginOffset(const size_t offset) { ++beginOffset_; } + // void incrementBeginOffset(const size_t offset) { ++beginOffset_; } // reimplement the begin method to support an offset //auto begin() const -> decltype(begin()) {return begin() + beginOffset_;} private: diff --git a/src/include/align/SinglePicker.hpp b/src/include/align/SinglePicker.hpp index b403cc1..35592a7 100644 --- a/src/include/align/SinglePicker.hpp +++ b/src/include/align/SinglePicker.hpp @@ -16,10 +16,10 @@ #define ALIGN_SINGLE_PICKER_HPP #include -#include -#include #include +#include +#include #include "align/Alignments.hpp" #include "align/InsertSizeParameters.hpp" diff --git a/src/include/align/VectorSmithWaterman.hpp b/src/include/align/VectorSmithWaterman.hpp index 73056d2..864df29 100644 --- a/src/include/align/VectorSmithWaterman.hpp +++ b/src/include/align/VectorSmithWaterman.hpp @@ -15,13 +15,12 @@ #ifndef ALIGN_VECTOR_SMITH_WATERMAN_HPP #define ALIGN_VECTOR_SMITH_WATERMAN_HPP -#include "align/SimilarityScores.hpp" -#include "common/DragenLogger.hpp" -#include "ssw/ssw.hpp" - #include "align/Alignment.hpp" #include "align/Database.hpp" #include "align/Query.hpp" +#include "align/SimilarityScores.hpp" +#include "common/DragenLogger.hpp" +#include "ssw/ssw.hpp" namespace dragenos { namespace align { @@ -67,7 +66,10 @@ class VectorSmithWaterman { #endif } - ~VectorSmithWaterman() { free(sswScoringMat_); } + ~VectorSmithWaterman() + { + free(sswScoringMat_); + } uint16_t align( const unsigned char* queryBegin, diff --git a/src/include/bam/Bam.hpp b/src/include/bam/Bam.hpp index 74b2255..9f34a98 100644 --- a/src/include/bam/Bam.hpp +++ b/src/include/bam/Bam.hpp @@ -14,6 +14,7 @@ #pragma once #include + #include namespace dragenos { diff --git a/src/include/bam/BamBlockReader.hpp b/src/include/bam/BamBlockReader.hpp index 565c512..14427b5 100644 --- a/src/include/bam/BamBlockReader.hpp +++ b/src/include/bam/BamBlockReader.hpp @@ -12,11 +12,10 @@ ** **/ +#include #include #include -#include - #include "bam/Bam.hpp" #include "common/Debug.hpp" #include "common/Exceptions.hpp" diff --git a/src/include/common/Debug.hpp b/src/include/common/Debug.hpp index cb8848e..701157a 100644 --- a/src/include/common/Debug.hpp +++ b/src/include/common/Debug.hpp @@ -16,11 +16,10 @@ #define COMMON_DEBUG_HPP #include +#include #include #include #include - -#include //#include #include #include diff --git a/src/include/common/Threads.hpp b/src/include/common/Threads.hpp index 34b027d..7057090 100644 --- a/src/include/common/Threads.hpp +++ b/src/include/common/Threads.hpp @@ -78,7 +78,7 @@ class unlock_guard { Lock& l; public: - unlock_guard(unlock_guard&) = delete; + unlock_guard(unlock_guard&) = delete; unlock_guard& operator=(unlock_guard&) = delete; explicit unlock_guard(Lock& m_) : l(m_) { l.unlock(); } diff --git a/src/include/fastq/FastqBlockReader.hpp b/src/include/fastq/FastqBlockReader.hpp index cef86f7..9b20974 100644 --- a/src/include/fastq/FastqBlockReader.hpp +++ b/src/include/fastq/FastqBlockReader.hpp @@ -12,11 +12,10 @@ ** **/ +#include #include #include -#include - namespace dragenos { namespace fastq { diff --git a/src/include/fastq/FastqNRecordReader.hpp b/src/include/fastq/FastqNRecordReader.hpp index 25dc79d..d765418 100644 --- a/src/include/fastq/FastqNRecordReader.hpp +++ b/src/include/fastq/FastqNRecordReader.hpp @@ -13,11 +13,10 @@ **/ #pragma once +#include #include #include -#include - #include "common/Debug.hpp" namespace dragenos { diff --git a/src/include/fastq/Token.hpp b/src/include/fastq/Token.hpp index 52f438d..41001ae 100644 --- a/src/include/fastq/Token.hpp +++ b/src/include/fastq/Token.hpp @@ -120,8 +120,10 @@ class BasicToken { { return os << "BasicToken(" << std::string(token.headerBegin_, token.headerEnd_) << " " << std::string(token.baseCallsBegin_, token.baseCallsEnd_) << " " - << std::string(token.qScoresBegin_, token.end_) - << "):" << (token.empty() ? "empty" : token.valid() ? "valid" : "invalid"); + << std::string(token.qScoresBegin_, token.end_) << "):" + << (token.empty() ? "empty" + : token.valid() ? "valid" + : "invalid"); } }; diff --git a/src/include/map/Mapper.hpp b/src/include/map/Mapper.hpp index d3b3942..3c6541f 100644 --- a/src/include/map/Mapper.hpp +++ b/src/include/map/Mapper.hpp @@ -16,9 +16,8 @@ #define MAP_MAPPER_HPP #include -#include - #include +#include #include "BestIntervalTracker.hpp" #include "common/Exceptions.hpp" diff --git a/src/include/map/SeedPosition.hpp b/src/include/map/SeedPosition.hpp index 30a7d47..6c1e5e6 100644 --- a/src/include/map/SeedPosition.hpp +++ b/src/include/map/SeedPosition.hpp @@ -31,7 +31,7 @@ class SeedPosition { } ReferencePosition getReferencePosition() const { return position_; } const sequences::Seed& getSeed() const { return seed_; } - unsigned getHalfExtension() const { return halfExtension_; } + unsigned getHalfExtension() const { return halfExtension_; } /// Projected reference position of the leftmost base of the read ReferencePosition getFirstProjection(bool reverseComplement) const; /// Projected reference position of the rightmost base of the read diff --git a/src/include/reference/Bucket.hpp b/src/include/reference/Bucket.hpp index 0c3fcdd..abc0dd6 100644 --- a/src/include/reference/Bucket.hpp +++ b/src/include/reference/Bucket.hpp @@ -16,6 +16,7 @@ #define REFERENCE_BUCKET_HPP #include + #include "reference/HashRecord.hpp" #include "reference/HashtableTraits.hpp" diff --git a/src/include/reference/ReferenceSequence.hpp b/src/include/reference/ReferenceSequence.hpp index e5b0626..96ca696 100644 --- a/src/include/reference/ReferenceSequence.hpp +++ b/src/include/reference/ReferenceSequence.hpp @@ -199,8 +199,14 @@ class ReferenceSequence { return getRcBaseNoCheck(position); } - const unsigned char* getData() const { return data_; } - size_t getSize() const { return size_; } + const unsigned char* getData() const + { + return data_; + } + size_t getSize() const + { + return size_; + } /// decode 4 bits into AIUPAC character using only 4 LSB static char decodeBase(unsigned char base); /// translate into 2 bits encoding using only 4 LSB @@ -228,22 +234,23 @@ class ReferenceSequence { inline unsigned char getRcBaseNoCheck(size_t position) const { unsigned char b = getBaseNoCheck(position); - const static std::array translate{0b0000, // 0b0000 - 0b1000, // 0b0001 - 0b0100, // 0b0010 - 0b1100, // 0b0011 - 0b0010, // 0b0100 - 0b1010, // 0b0101 - 0b0110, // 0b0110 - 0b1110, // 0b0111 - 0b0001, // 0b1000 - 0b1001, // 0b1001 - 0b0101, // 0b1010 - 0b1101, // 0b1011 - 0b0011, // 0b1100 - 0b1011, // 0b1101 - 0b0111, // 0b1110 - 0b1111}; // 0b1111 + const static std::array translate{ + 0b0000, // 0b0000 + 0b1000, // 0b0001 + 0b0100, // 0b0010 + 0b1100, // 0b0011 + 0b0010, // 0b0100 + 0b1010, // 0b0101 + 0b0110, // 0b0110 + 0b1110, // 0b0111 + 0b0001, // 0b1000 + 0b1001, // 0b1001 + 0b0101, // 0b1010 + 0b1101, // 0b1011 + 0b0011, // 0b1100 + 0b1011, // 0b1101 + 0b0111, // 0b1110 + 0b1111}; // 0b1111 return translate[b]; } diff --git a/src/include/sequences/CrcHasher.hpp b/src/include/sequences/CrcHasher.hpp index c7afdc0..67e90aa 100644 --- a/src/include/sequences/CrcHasher.hpp +++ b/src/include/sequences/CrcHasher.hpp @@ -15,12 +15,12 @@ #ifndef SEQUENCES_CRC_HASHER_HPP #define SEQUENCES_CRC_HASHER_HPP +#include + #include #include #include -#include - #include "CrcPolynomial.hpp" namespace dragenos { diff --git a/src/lib/align/Aligner.cpp b/src/lib/align/Aligner.cpp index 89cb7ef..9922d4f 100644 --- a/src/lib/align/Aligner.cpp +++ b/src/lib/align/Aligner.cpp @@ -12,22 +12,23 @@ ** **/ +#include "align/Aligner.hpp" + +#include +#include + #include #include #include #include -#include -#include -#include "common/DragenLogger.hpp" -#include "common/Exceptions.hpp" - -#include "align/Aligner.hpp" #include "align/CalculateRefStartEnd.hpp" #include "align/Mapq.hpp" #include "align/PairBuilder.hpp" #include "align/Pairs.hpp" #include "align/Tlen.hpp" +#include "common/DragenLogger.hpp" +#include "common/Exceptions.hpp" #include "sequences/Seed.hpp" namespace dragenos { diff --git a/src/lib/align/AlignmentGenerator.cpp b/src/lib/align/AlignmentGenerator.cpp index 3460591..316516d 100644 --- a/src/lib/align/AlignmentGenerator.cpp +++ b/src/lib/align/AlignmentGenerator.cpp @@ -13,6 +13,7 @@ **/ #include "align/AlignmentGenerator.hpp" + #include "align/CalculateRefStartEnd.hpp" #include "common/DragenLogger.hpp" diff --git a/src/lib/align/AlignmentRescue.cpp b/src/lib/align/AlignmentRescue.cpp index 396aefb..e60e7ad 100644 --- a/src/lib/align/AlignmentRescue.cpp +++ b/src/lib/align/AlignmentRescue.cpp @@ -12,11 +12,13 @@ ** **/ +#include "align/AlignmentRescue.hpp" + #include + #include #include -#include "align/AlignmentRescue.hpp" #include "common/DragenLogger.hpp" #include "map/SeedPosition.hpp" diff --git a/src/lib/align/Cigar.cpp b/src/lib/align/Cigar.cpp index e5f65f5..0179d71 100644 --- a/src/lib/align/Cigar.cpp +++ b/src/lib/align/Cigar.cpp @@ -21,262 +21,263 @@ namespace dragenos { namespace align { const char Cigar::OPERATION_NAMES[] = {'M', 'I', 'D', 'N', 'S', 'H', 'P', '=', 'X'}; -const Cigar::OperationCode Cigar::OPERATION_CODES[256] = {INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - SEQUENCE_MATCH, - INV, - INV, - INV, - INV, - INV, - INV, - DELETE, - INV, - INV, - INV, - HARD_CLIP, - INSERT, - INV, - INV, - INV, - ALIGNMENT_MATCH, - SKIP, - INV, - PAD, - INV, - INV, - SOFT_CLIP, - INV, - INV, - INV, - INV, - SEQUENCE_MISMATCH, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV, - INV}; +const Cigar::OperationCode Cigar::OPERATION_CODES[256] = { + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + SEQUENCE_MATCH, + INV, + INV, + INV, + INV, + INV, + INV, + DELETE, + INV, + INV, + INV, + HARD_CLIP, + INSERT, + INV, + INV, + INV, + ALIGNMENT_MATCH, + SKIP, + INV, + PAD, + INV, + INV, + SOFT_CLIP, + INV, + INV, + INV, + INV, + SEQUENCE_MISMATCH, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV, + INV}; Cigar::OperationCode Cigar::getOperationCode(const char operationName) { diff --git a/src/lib/align/PairBuilder.cpp b/src/lib/align/PairBuilder.cpp index a2d6522..120f784 100644 --- a/src/lib/align/PairBuilder.cpp +++ b/src/lib/align/PairBuilder.cpp @@ -13,6 +13,7 @@ **/ #include "align/PairBuilder.hpp" + #include "align/AlignmentRescue.hpp" #include "align/SinglePicker.hpp" #include "align/Tlen.hpp" @@ -348,9 +349,9 @@ void PairBuilder::updateEndMapq( const bool mapq0 = (aln_cfg_sample_mapq0_ >= 1 && best->hasOnlyRandomSamples()) || (aln_cfg_sample_mapq0_ >= 2 && best->isExtra()); const MapqType mapq = mapq0 ? 0 - : (INVALID_SCORE != xs_score_diff) - ? std::min(std::max(0, xs_heur_mapq), mapq_prod_pen) - : mapq_prod_pen; + : (INVALID_SCORE != xs_score_diff) + ? std::min(std::max(0, xs_heur_mapq), mapq_prod_pen) + : mapq_prod_pen; #ifdef TRACE_SCORING std::cerr << "[SCORING]\t" diff --git a/src/lib/align/SmithWaterman.cpp b/src/lib/align/SmithWaterman.cpp index e749c56..83a39ad 100644 --- a/src/lib/align/SmithWaterman.cpp +++ b/src/lib/align/SmithWaterman.cpp @@ -13,12 +13,13 @@ **/ #include "align/SmithWaterman.hpp" -#include "common/DragenLogger.hpp" #include #include #include +#include "common/DragenLogger.hpp" + namespace dragenos { namespace align { diff --git a/src/lib/align/VectorSmithWaterman.cpp b/src/lib/align/VectorSmithWaterman.cpp index 1548aeb..6c3a214 100644 --- a/src/lib/align/VectorSmithWaterman.cpp +++ b/src/lib/align/VectorSmithWaterman.cpp @@ -13,8 +13,10 @@ **/ #include "align/VectorSmithWaterman.hpp" + #include #include + #include "ssw/ssw.hpp" namespace dragenos { diff --git a/src/lib/align/Wavefront.cpp b/src/lib/align/Wavefront.cpp index ed45b63..67e4aec 100644 --- a/src/lib/align/Wavefront.cpp +++ b/src/lib/align/Wavefront.cpp @@ -13,6 +13,7 @@ **/ #include "align/Wavefront.hpp" + #include namespace dragenos { diff --git a/src/lib/align/tests/unit/CigarGtest.cpp b/src/lib/align/tests/unit/CigarGtest.cpp index 753acc4..ec8a4f0 100644 --- a/src/lib/align/tests/unit/CigarGtest.cpp +++ b/src/lib/align/tests/unit/CigarGtest.cpp @@ -1,14 +1,14 @@ -#include "gtest/gtest.h" - #include #include #include "align/Cigar.hpp" +#include "gtest/gtest.h" TEST(Cigar, OperationNames) { using dragenos::align::Cigar; - ASSERT_EQ((unsigned long)Cigar::SEQUENCE_MISMATCH + 1, std::extent::value); + ASSERT_EQ( + (unsigned long)Cigar::SEQUENCE_MISMATCH + 1, std::extent::value); ASSERT_EQ('M', Cigar::getOperationName(Cigar::ALIGNMENT_MATCH)); ASSERT_EQ('I', Cigar::getOperationName(Cigar::INSERT)); ASSERT_EQ('D', Cigar::getOperationName(Cigar::DELETE)); diff --git a/src/lib/align/tests/unit/WavefrontGtest.cpp b/src/lib/align/tests/unit/WavefrontGtest.cpp index a6edae0..94b805c 100644 --- a/src/lib/align/tests/unit/WavefrontGtest.cpp +++ b/src/lib/align/tests/unit/WavefrontGtest.cpp @@ -1,8 +1,7 @@ -#include "gtest/gtest.h" - #include #include "align/Wavefront.hpp" +#include "gtest/gtest.h" TEST(Wavefront, Constructor) { @@ -297,21 +296,22 @@ TEST(Wavefront, moveRight) constexpr short MISMATCH = -3; constexpr short gapInit = 5; constexpr short gapExtend = 3; - const Antidiagonal similarities{MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH}; + const Antidiagonal similarities{ + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH}; // E is unshifted const Antidiagonal ae{ gapExtend - 1, gapExtend + 1, gapExtend + 2, gapExtend + 3, gapExtend + 4, 0, 0, 0, 0, 0, 0, 0, 0, 0}; @@ -354,21 +354,22 @@ TEST(Wavefront, moveDown) constexpr short MISMATCH = -3; constexpr short gapInit = 5; constexpr short gapExtend = 3; - const Antidiagonal similarities{MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH, - MISMATCH}; + const Antidiagonal similarities{ + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH, + MISMATCH}; // E is shifted by one yo the right (nextE[i] = lastE[i-1]) const Antidiagonal ae{ gapExtend - 1, gapExtend + 1, gapExtend + 2, gapExtend + 3, gapExtend + 4, 0, 0, 0, 0, 0, 0, 0, 0, 0}; diff --git a/src/lib/bam/Tokenizer.cpp b/src/lib/bam/Tokenizer.cpp index 7e388b8..a6d25b7 100644 --- a/src/lib/bam/Tokenizer.cpp +++ b/src/lib/bam/Tokenizer.cpp @@ -13,6 +13,7 @@ **/ #include "bam/Tokenizer.hpp" + #include "common/Debug.hpp" namespace dragenos { diff --git a/src/lib/common/Debug.cpp b/src/lib/common/Debug.cpp index 6b380b7..651d3b2 100644 --- a/src/lib/common/Debug.cpp +++ b/src/lib/common/Debug.cpp @@ -13,6 +13,7 @@ **/ #include "common/Debug.hpp" + #include "common/SystemCompatibility.hpp" namespace dragenos { diff --git a/src/lib/common/Program.cpp b/src/lib/common/Program.cpp index de4bc8d..c0111fb 100644 --- a/src/lib/common/Program.cpp +++ b/src/lib/common/Program.cpp @@ -12,12 +12,11 @@ ** **/ -#include +#include "common/Program.hpp" #include #include - -#include "common/Program.hpp" +#include namespace dragenos { namespace common { diff --git a/src/lib/common/SystemCompatibility.cpp b/src/lib/common/SystemCompatibility.cpp index d194cef..44bb6b6 100644 --- a/src/lib/common/SystemCompatibility.cpp +++ b/src/lib/common/SystemCompatibility.cpp @@ -12,17 +12,17 @@ ** **/ -#include +#include "common/SystemCompatibility.hpp" -#include -#include +#include #include #include +#include +#include #include "common/Debug.hpp" #include "common/Exceptions.hpp" -#include "common/SystemCompatibility.hpp" // TODO: add a proper configuration system as needed #define HAVE_SIGNAL_H diff --git a/src/lib/fastq/Tokenizer.cpp b/src/lib/fastq/Tokenizer.cpp index f5a2eaa..118f050 100644 --- a/src/lib/fastq/Tokenizer.cpp +++ b/src/lib/fastq/Tokenizer.cpp @@ -13,6 +13,7 @@ **/ #include "fastq/Tokenizer.hpp" + #include "common/Debug.hpp" namespace dragenos { diff --git a/src/lib/fastq/tests/unit/TokenizerGtest.cpp b/src/lib/fastq/tests/unit/TokenizerGtest.cpp index a3cdfd8..38a4a56 100644 --- a/src/lib/fastq/tests/unit/TokenizerGtest.cpp +++ b/src/lib/fastq/tests/unit/TokenizerGtest.cpp @@ -1,10 +1,9 @@ -#include "gtest/gtest.h" - #include #include #include #include "fastq/Tokenizer.hpp" +#include "gtest/gtest.h" const std::string ONE_RECORD( "@NB551322:14:HFVLLBGX9:4:11401:24054:1050 2:N:0:CGGCTATG+CCGTCGCC\n" diff --git a/src/lib/io/tests/integration/Fastq2ReadTransformerGtest.cpp b/src/lib/io/tests/integration/Fastq2ReadTransformerGtest.cpp index 77466fb..cd66859 100644 --- a/src/lib/io/tests/integration/Fastq2ReadTransformerGtest.cpp +++ b/src/lib/io/tests/integration/Fastq2ReadTransformerGtest.cpp @@ -1,8 +1,7 @@ -#include "gtest/gtest.h" - #include #include "align/Aligner.hpp" +#include "gtest/gtest.h" #include "io/Fastq2ReadTransformer.hpp" const std::string THREE_RECORDS( diff --git a/src/lib/map/ChainBuilder.cpp b/src/lib/map/ChainBuilder.cpp index 242d97d..0d7895a 100644 --- a/src/lib/map/ChainBuilder.cpp +++ b/src/lib/map/ChainBuilder.cpp @@ -12,10 +12,11 @@ ** **/ +#include "map/ChainBuilder.hpp" + #include #include "common/DragenLogger.hpp" -#include "map/ChainBuilder.hpp" namespace dragenos { namespace map { diff --git a/src/lib/map/Mapper.cpp b/src/lib/map/Mapper.cpp index 6eaa2c5..86790fc 100644 --- a/src/lib/map/Mapper.cpp +++ b/src/lib/map/Mapper.cpp @@ -27,23 +27,24 @@ namespace dragenos { namespace map { // debug variables -const char* typeString[17] = {"EMPTY", - "HIFREQ", - "EXTEND", - "REPAIR", // obsolete - "CHAIN_BEG_MASK", - "CHAIN_BEG_LIST", - "CHAIN_CON_MASK", - "CHAIN_CON_LIST", - "INTERVAL_SL", - "INTERVAL_SLE", - "INTERVAL_S", - "INTERVAL_L", - "", - "", - "", - "", - "HIT"}; +const char* typeString[17] = { + "EMPTY", + "HIFREQ", + "EXTEND", + "REPAIR", // obsolete + "CHAIN_BEG_MASK", + "CHAIN_BEG_LIST", + "CHAIN_CON_MASK", + "CHAIN_CON_LIST", + "INTERVAL_SL", + "INTERVAL_SLE", + "INTERVAL_S", + "INTERVAL_L", + "", + "", + "", + "", + "HIT"}; // end of debug variables #ifdef TRACE_SEED_CHAINS static uint32_t seedOffset = 0; diff --git a/src/lib/map/tests/integration/ChainBuilderGtest.cpp b/src/lib/map/tests/integration/ChainBuilderGtest.cpp index 71c44cd..e2b56ef 100644 --- a/src/lib/map/tests/integration/ChainBuilderGtest.cpp +++ b/src/lib/map/tests/integration/ChainBuilderGtest.cpp @@ -1,5 +1,4 @@ #include "gtest/gtest.h" - #include "map/ChainBuilder.hpp" TEST(ChainBuilder, Constructor) diff --git a/src/lib/map/tests/integration/SeedChainGtest.cpp b/src/lib/map/tests/integration/SeedChainGtest.cpp index aef9ff8..9332e36 100644 --- a/src/lib/map/tests/integration/SeedChainGtest.cpp +++ b/src/lib/map/tests/integration/SeedChainGtest.cpp @@ -1,7 +1,6 @@ -#include "gtest/gtest.h" - #include +#include "gtest/gtest.h" #include "map/SeedChain.hpp" using dragenos::map::SeedChain; diff --git a/src/lib/map/tests/unit/BestIntervalTrackerGtest.cpp b/src/lib/map/tests/unit/BestIntervalTrackerGtest.cpp index 1514f63..1995d54 100644 --- a/src/lib/map/tests/unit/BestIntervalTrackerGtest.cpp +++ b/src/lib/map/tests/unit/BestIntervalTrackerGtest.cpp @@ -1,6 +1,5 @@ -#include "gtest/gtest.h" - #include "SeedPositionMocks.hpp" +#include "gtest/gtest.h" #include "map/BestIntervalTracker.hpp" using dragenos::map::BestIntervalTracker; diff --git a/src/lib/map/tests/unit/SeedPositionGtest.cpp b/src/lib/map/tests/unit/SeedPositionGtest.cpp index ff4e226..1412223 100644 --- a/src/lib/map/tests/unit/SeedPositionGtest.cpp +++ b/src/lib/map/tests/unit/SeedPositionGtest.cpp @@ -1,7 +1,5 @@ -#include "gtest/gtest.h" - #include "SeedPositionMocks.hpp" - +#include "gtest/gtest.h" #include "map/SeedPosition.hpp" using dragenos::map::SeedPosition; diff --git a/src/lib/options/DragenOsOptions.cpp b/src/lib/options/DragenOsOptions.cpp index 41e32c2..049c9c2 100644 --- a/src/lib/options/DragenOsOptions.cpp +++ b/src/lib/options/DragenOsOptions.cpp @@ -12,13 +12,9 @@ ** **/ -#include -#include -#include -#include -#include -#include +#include "options/DragenOsOptions.hpp" +#include #include #include #include @@ -27,10 +23,14 @@ #include #include #include +#include +#include +#include +#include +#include #include "common/Exceptions.hpp" #include "common/Version.hpp" -#include "options/DragenOsOptions.hpp" namespace dragenos { namespace options { diff --git a/src/lib/reference/ExtendTableInterval.cpp b/src/lib/reference/ExtendTableInterval.cpp index eb53ed1..ea4edd2 100644 --- a/src/lib/reference/ExtendTableInterval.cpp +++ b/src/lib/reference/ExtendTableInterval.cpp @@ -30,8 +30,7 @@ ExtendTableInterval::ExtendTableInterval(I begin, I end) } // explicit instantiation for vectors and pointers -template ExtendTableInterval::ExtendTableInterval( - const HashRecord* begin, const HashRecord* end); +template ExtendTableInterval::ExtendTableInterval(const HashRecord* begin, const HashRecord* end); template ExtendTableInterval::ExtendTableInterval( std::vector::const_iterator begin, std::vector::const_iterator end); template ExtendTableInterval::ExtendTableInterval( diff --git a/src/lib/reference/HashtableConfig.cpp b/src/lib/reference/HashtableConfig.cpp index ce8a9d3..2e3d4c6 100644 --- a/src/lib/reference/HashtableConfig.cpp +++ b/src/lib/reference/HashtableConfig.cpp @@ -13,6 +13,7 @@ **/ #include "reference/HashtableConfig.hpp" + #include #include #include diff --git a/src/lib/reference/ReferenceDir.cpp b/src/lib/reference/ReferenceDir.cpp index 669c345..e676928 100644 --- a/src/lib/reference/ReferenceDir.cpp +++ b/src/lib/reference/ReferenceDir.cpp @@ -12,16 +12,17 @@ ** **/ +#include "reference/ReferenceDir.hpp" + #include #include + +#include #include #include #include -#include - #include "common/hash_generation/hash_table_compress.h" -#include "reference/ReferenceDir.hpp" namespace dragenos { namespace reference { @@ -52,7 +53,7 @@ typename ReferenceDir7::UcharPtr ReferenceDir7::ReadFileIntoBuffer( size = file.tellg(); file.seekg(0, file.beg); - UcharPtr bufPtr(new uint8_t[size], [](uint8_t* p) -> void { delete [](p); }); + UcharPtr bufPtr(new uint8_t[size], [](uint8_t* p) -> void { delete[](p); }); file.read(reinterpret_cast(bufPtr.get()), size); if (!file) { // THROW(DragenException, "Could not load reference - could not read ", path); @@ -118,8 +119,7 @@ ReferenceDir7::ReferenceDir7(const boost::filesystem::path& path, bool mmap, boo // restore stdout dup2(stdoutori, 1); - hashtableData_ = - Uint64Ptr(reinterpret_cast(hashbuf), [](uint64_t* p) -> void { free(p); }); + hashtableData_ = Uint64Ptr(reinterpret_cast(hashbuf), [](uint64_t* p) -> void { free(p); }); extendTableData_ = Uint64Ptr(reinterpret_cast(extendTableBuf), [](uint64_t* p) -> void { free(p); }); } diff --git a/src/lib/reference/tests/integration/ExtendTableIntervalGtest.cpp b/src/lib/reference/tests/integration/ExtendTableIntervalGtest.cpp index 79e5052..8359a28 100644 --- a/src/lib/reference/tests/integration/ExtendTableIntervalGtest.cpp +++ b/src/lib/reference/tests/integration/ExtendTableIntervalGtest.cpp @@ -1,5 +1,4 @@ #include "gtest/gtest.h" - #include "reference/ExtendTableInterval.hpp" TEST(ExtendTableInterval, SetOfOne) @@ -42,7 +41,7 @@ TEST(ExtendTableInterval, SetOfTwo) uint64_t s4[] = {0x7bf9df58fa123456, 0x7bf9fedcfb987654}; uint64_t* ss[] = {s1, s2, s3, s4}; uint32_t esl[][3] = { - {0, 0x56987654, 0x1234}, {0x12, 0x56987654, 0x34}, {0, 0x56, 0x34987654}, {0, 0x123456, 0x987654}}; + {0, 0x56987654, 0x1234}, {0x12, 0x56987654, 0x34}, {0, 0x56, 0x34987654}, {0, 0x123456, 0x987654}}; for (unsigned i = 0; 4 > i; ++i) { const auto begin = reinterpret_cast(ss[i]); const auto end = begin + 2; @@ -95,7 +94,7 @@ TEST(ExtendTableInterval, SetOfThree) uint64_t s5[] = {0xc851b149f900001a, 0xc851b148fabb10ae, 0xc851b14afb01f3b0}; uint64_t* ss[] = {s1, s2, s3, s4, s5}; uint32_t sl[][2] = { - {5777464, 92356}, {4231852, 80514}, {0x27c4fc2, 106658}, {0x1b46391e, 96576}, {0x1abb10ae, 127920}}; + {5777464, 92356}, {4231852, 80514}, {0x27c4fc2, 106658}, {0x1b46391e, 96576}, {0x1abb10ae, 127920}}; for (unsigned i = 0; 5 > i; ++i) { const auto begin = reinterpret_cast(ss[i]); const auto end = begin + 3; diff --git a/src/lib/reference/tests/integration/HashtableGtest.cpp b/src/lib/reference/tests/integration/HashtableGtest.cpp index b3e3942..1f9dce3 100644 --- a/src/lib/reference/tests/integration/HashtableGtest.cpp +++ b/src/lib/reference/tests/integration/HashtableGtest.cpp @@ -1,5 +1,4 @@ #include "gtest/gtest.h" - #include "reference/Hashtable.hpp" /** diff --git a/src/lib/reference/tests/unit/HashRecordGtest.cpp b/src/lib/reference/tests/unit/HashRecordGtest.cpp index a2efa1a..a3ead21 100644 --- a/src/lib/reference/tests/unit/HashRecordGtest.cpp +++ b/src/lib/reference/tests/unit/HashRecordGtest.cpp @@ -1,5 +1,4 @@ #include "gtest/gtest.h" - #include "reference/HashRecord.hpp" TEST(HashRecord, Constructor) {} diff --git a/src/lib/reference/tests/unit/HashtableConfigGtest.cpp b/src/lib/reference/tests/unit/HashtableConfigGtest.cpp index 24b08d4..b94b81a 100644 --- a/src/lib/reference/tests/unit/HashtableConfigGtest.cpp +++ b/src/lib/reference/tests/unit/HashtableConfigGtest.cpp @@ -1,7 +1,6 @@ //#include "HashtableConfigMocks.hpp" #include "gtest/gtest.h" - #include "reference/HashtableConfig.hpp" TEST(HashtableConfig, Constructor) {} diff --git a/src/lib/reference/tests/unit/ReferenceSequenceGtest.cpp b/src/lib/reference/tests/unit/ReferenceSequenceGtest.cpp index 2105696..bd2a12c 100644 --- a/src/lib/reference/tests/unit/ReferenceSequenceGtest.cpp +++ b/src/lib/reference/tests/unit/ReferenceSequenceGtest.cpp @@ -1,8 +1,7 @@ -#include "gtest/gtest.h" - #include #include +#include "gtest/gtest.h" #include "reference/ReferenceSequence.hpp" static std::string bases = "ACGTAACCGGTTAAACCCGGGTTT"; diff --git a/src/lib/sequences/CrcHasher.cpp b/src/lib/sequences/CrcHasher.cpp index 4d0a4cb..3455f68 100644 --- a/src/lib/sequences/CrcHasher.cpp +++ b/src/lib/sequences/CrcHasher.cpp @@ -12,13 +12,14 @@ ** **/ +#include "sequences/CrcHasher.hpp" + #include #include + #include #include -#include "sequences/CrcHasher.hpp" - namespace dragenos { namespace sequences { diff --git a/src/lib/sequences/Read.cpp b/src/lib/sequences/Read.cpp index 97b1aca..62a7bd7 100644 --- a/src/lib/sequences/Read.cpp +++ b/src/lib/sequences/Read.cpp @@ -12,6 +12,8 @@ ** **/ +#include "sequences/Read.hpp" + #include #include @@ -19,7 +21,6 @@ #include #include "common/Exceptions.hpp" -#include "sequences/Read.hpp" namespace dragenos { namespace sequences { diff --git a/src/lib/sequences/tests/unit/CrcHasherGtest.cpp b/src/lib/sequences/tests/unit/CrcHasherGtest.cpp index 3e5a67e..bda09c8 100644 --- a/src/lib/sequences/tests/unit/CrcHasherGtest.cpp +++ b/src/lib/sequences/tests/unit/CrcHasherGtest.cpp @@ -1,7 +1,7 @@ +#include + #include "CrcHasherMocks.hpp" #include "gtest/gtest.h" - -#include #include "sequences/CrcHasher.hpp" TEST(CrcHasher, KnownHashValues) diff --git a/src/lib/sequences/tests/unit/CrcPolynomialGtest.cpp b/src/lib/sequences/tests/unit/CrcPolynomialGtest.cpp index 903d589..e386386 100644 --- a/src/lib/sequences/tests/unit/CrcPolynomialGtest.cpp +++ b/src/lib/sequences/tests/unit/CrcPolynomialGtest.cpp @@ -1,5 +1,4 @@ #include "gtest/gtest.h" - #include "sequences/CrcPolynomial.hpp" typedef dragenos::sequences::CrcPolynomial crc_polynomial_t; diff --git a/src/lib/sequences/tests/unit/SeedGtest.cpp b/src/lib/sequences/tests/unit/SeedGtest.cpp index 7e385cf..ee29743 100644 --- a/src/lib/sequences/tests/unit/SeedGtest.cpp +++ b/src/lib/sequences/tests/unit/SeedGtest.cpp @@ -1,11 +1,9 @@ -#include "SeedMocks.hpp" - -#include "gtest/gtest.h" - #include #include +#include "SeedMocks.hpp" #include "common/Exceptions.hpp" +#include "gtest/gtest.h" #include "sequences/Seed.hpp" namespace dragenos { @@ -135,7 +133,8 @@ TEST(SeedTest, getPrimaryData) ASSERT_EQ(151u, read.getLength()); for (unsigned i = 0; read.getLength() > i; ++i) { ASSERT_EQ((unsigned long)read.getBase2bpb(i), Seed(&read, i, 1).getPrimaryData(false)) << "i: " << i; - ASSERT_EQ((unsigned long)(~read.getBase2bpb(i)) & 3, Seed(&read, i, 1).getPrimaryData(true)) << "i: " << i; + ASSERT_EQ((unsigned long)(~read.getBase2bpb(i)) & 3, Seed(&read, i, 1).getPrimaryData(true)) + << "i: " << i; } ASSERT_THROW( Seed(&read, read.getLength(), 1).getPrimaryData(false), dragenos::common::PreConditionException); diff --git a/src/lib/workflow/DualFastq2SamWorkflow.cpp b/src/lib/workflow/DualFastq2SamWorkflow.cpp index 8b5d8de..fcbb14e 100644 --- a/src/lib/workflow/DualFastq2SamWorkflow.cpp +++ b/src/lib/workflow/DualFastq2SamWorkflow.cpp @@ -12,26 +12,22 @@ ** **/ +#include #include #include - -#include // #include #include #include -#include "common/Debug.hpp" -#include "common/Threads.hpp" -#include "mapping_stats.hpp" - #include "align/Aligner.hpp" #include "align/SinglePicker.hpp" +#include "common/Debug.hpp" +#include "common/Threads.hpp" #include "fastq/Tokenizer.hpp" #include "io/Fastq2ReadTransformer.hpp" +#include "mapping_stats.hpp" #include "sam/SamGenerator.hpp" - #include "workflow/DualFastq2SamWorkflow.hpp" - #include "workflow/alignment/AlignmentUtils.hpp" namespace dragenos { @@ -305,11 +301,11 @@ void DualFastq2SamWorkflow::alignDualFastqBlock( { common::unlock_guard unlock(lock); boost::iostreams::filtering_istream inputR1; - inputR1.push(boost::iostreams::basic_array_source{&r1Block.front(), - &r1Block.front() + r1Block.size()}); + inputR1.push(boost::iostreams::basic_array_source{ + &r1Block.front(), &r1Block.front() + r1Block.size()}); boost::iostreams::filtering_istream inputR2; - inputR2.push(boost::iostreams::basic_array_source{&r2Block.front(), - &r2Block.front() + r2Block.size()}); + inputR2.push(boost::iostreams::basic_array_source{ + &r2Block.front(), &r2Block.front() + r2Block.size()}); insertSizeParameters = requestInsertSizeInfo(insertSizeDistribution, inputR1, inputR2); } assert(blockToGetInsertSizes_ == ourBlock); @@ -325,11 +321,11 @@ void DualFastq2SamWorkflow::alignDualFastqBlock( { common::unlock_guard unlock(lock); boost::iostreams::filtering_istream inputR1; - inputR1.push(boost::iostreams::basic_array_source{&r1Block.front(), - &r1Block.front() + r1Block.size()}); + inputR1.push(boost::iostreams::basic_array_source{ + &r1Block.front(), &r1Block.front() + r1Block.size()}); boost::iostreams::filtering_istream inputR2; - inputR2.push(boost::iostreams::basic_array_source{&r2Block.front(), - &r2Block.front() + r2Block.size()}); + inputR2.push(boost::iostreams::basic_array_source{ + &r2Block.front(), &r2Block.front() + r2Block.size()}); insBuffer.clear(); tmpBuffer.clear(); diff --git a/src/lib/workflow/GenHashTableWorkflow.cpp b/src/lib/workflow/GenHashTableWorkflow.cpp index dc78175..4c94a5c 100644 --- a/src/lib/workflow/GenHashTableWorkflow.cpp +++ b/src/lib/workflow/GenHashTableWorkflow.cpp @@ -28,9 +28,8 @@ // #include "common/hash_generation/gen_hash_table.h" #include "common/hash_generation/hash_table_compress.h" -#include "workflow/GenHashTableWorkflow.hpp" - #include "common/public/linux_utils.hpp" +#include "workflow/GenHashTableWorkflow.hpp" namespace dragenos { namespace workflow { diff --git a/src/lib/workflow/Input2SamWorkflow.cpp b/src/lib/workflow/Input2SamWorkflow.cpp index 5f5ab9e..238ed2b 100644 --- a/src/lib/workflow/Input2SamWorkflow.cpp +++ b/src/lib/workflow/Input2SamWorkflow.cpp @@ -12,21 +12,21 @@ ** **/ -#include -#include -#include -#include - -#include "boost/iostreams/filter/gzip.hpp" +#include "workflow/Input2SamWorkflow.hpp" #include #include #include +#include +#include +#include +#include #include "align/Aligner.hpp" #include "align/SinglePicker.hpp" #include "bam/BamBlockReader.hpp" #include "bam/Tokenizer.hpp" +#include "boost/iostreams/filter/gzip.hpp" #include "common/Debug.hpp" #include "common/Threads.hpp" #include "fastq/FastqBlockReader.hpp" @@ -37,10 +37,7 @@ #include "options/DragenOsOptions.hpp" #include "reference/ReferenceDir.hpp" #include "sam/SamGenerator.hpp" - #include "workflow/DualFastq2SamWorkflow.hpp" -#include "workflow/Input2SamWorkflow.hpp" - #include "workflow/alignment/AlignmentUtils.hpp" namespace dragenos { diff --git a/tests/ExtendTableGtest.cpp b/tests/ExtendTableGtest.cpp index 33daa90..b0e7b9c 100644 --- a/tests/ExtendTableGtest.cpp +++ b/tests/ExtendTableGtest.cpp @@ -1,40 +1,38 @@ #include "gtest/gtest.h" +#include +#include #include +#include +#include #include +#include #include #include -#include -#include -#include -#include -#include #include -#include -#include -#include #include +#include +#include +#include #include "common/Bits.hpp" -#include "sequences/Read.hpp" -#include "sequences/Seed.hpp" +#include "common/Exceptions.hpp" #include "reference/Hashtable.hpp" #include "reference/ReferenceSequence.hpp" -#include "common/Exceptions.hpp" +#include "sequences/Read.hpp" +#include "sequences/Seed.hpp" -class ExtendTableFixture: public ::testing::Test -{ +class ExtendTableFixture : public ::testing::Test { public: - ExtendTableFixture() - { - } + ExtendTableFixture() {} static void SetUpTestCase(); - static void TearDownTestCase() - { - } - void showInterval(const size_t start, const size_t length, const unsigned seedLength = 21, const unsigned halfWing = 0) const; + static void TearDownTestCase() {} + void showInterval(const size_t start, const size_t length, + const unsigned seedLength = 21, + const unsigned halfWing = 0) const; + protected: typedef dragenos::reference::HashtableConfig HashtableConfig; typedef dragenos::reference::HashtableTraits HashtableTraits; @@ -45,187 +43,190 @@ class ExtendTableFixture: public ::testing::Test static ReferenceSequence referenceSequence; }; - class Environment : public ::testing::Environment { public: typedef dragenos::reference::HashtableConfig HashtableConfig; typedef dragenos::reference::HashtableTraits HashtableTraits; typedef dragenos::reference::Hashtable Hashtable; typedef dragenos::reference::ReferenceSequence ReferenceSequence; - Environment() : hashtableFd(-1), extendTableFd(-1), table(nullptr), extendTable(nullptr) - { - } - virtual ~Environment() - { - } - static unsigned char *getReferenceData() {return referenceData;} - static size_t getReferenceSize() {return referenceFileSize;} - static Hashtable *getHashtable() {return hashtable.get();} - static HashtableConfig *getHashtableConfig() {return hashtableConfig.get();} + Environment() + : hashtableFd(-1), extendTableFd(-1), table(nullptr), + extendTable(nullptr) {} + virtual ~Environment() {} + static unsigned char *getReferenceData() { return referenceData; } + static size_t getReferenceSize() { return referenceFileSize; } + static Hashtable *getHashtable() { return hashtable.get(); } + static HashtableConfig *getHashtableConfig() { return hashtableConfig.get(); } // Override this to define how to set up the environment. void SetUp() override; // Override this to define how to tear down the environment. void TearDown() override; static const std::vector sequence; static const std::vector qualities; + private: static std::unique_ptr hashtableConfig; static std::unique_ptr hashtable; int hashtableFd; int extendTableFd; + public: - uint64_t* table; - uint64_t* extendTable; + uint64_t *table; + uint64_t *extendTable; + private: int referenceFd; static size_t referenceFileSize; static unsigned char *referenceData; }; -Environment* globalTestEnvironment = nullptr; +Environment *globalTestEnvironment = nullptr; -std::string intervalToString(const dragenos::reference::HashRecord hashRecord) -{ +std::string intervalToString(const dragenos::reference::HashRecord hashRecord) { std::ostringstream os; using dragenos::reference::HashRecord; const auto type = hashRecord.getType(); const auto value = hashRecord.getValue(); - switch (type) - { - case HashRecord::INTERVAL_SL: - os << "SL"; - if (!hashRecord.isReverseComplement()) - { - os << "0"; - os << " length: " << ((value >> 15) & 0x1ffUL); - os << " start: " << (value & 0x7fffUL); - } - else - { - os << "1"; - os << " length: " << ((value >> 8) & 0xffffUL); - os << " start: " << (value & 0xffUL); - } - break; - case HashRecord::INTERVAL_SLE: - os << "SLE"; - os << " exlift: " << ((value >> 16) & 0xffUL); - os << " length: " << ((value >> 8) & 0xffUL); + switch (type) { + case HashRecord::INTERVAL_SL: + os << "SL"; + if (!hashRecord.isReverseComplement()) { + os << "0"; + os << " length: " << ((value >> 15) & 0x1ffUL); + os << " start: " << (value & 0x7fffUL); + } else { + os << "1"; + os << " length: " << ((value >> 8) & 0xffffUL); os << " start: " << (value & 0xffUL); - break; - case HashRecord::INTERVAL_S: - os << "S "; - os << " start: " << (value & 0xffffffUL); - break; - case HashRecord::INTERVAL_L: - os << "L "; - os << " length: " << (value & 0xffffffUL); - break; - default: os << "unknown"; break; + } + break; + case HashRecord::INTERVAL_SLE: + os << "SLE"; + os << " exlift: " << ((value >> 16) & 0xffUL); + os << " length: " << ((value >> 8) & 0xffUL); + os << " start: " << (value & 0xffUL); + break; + case HashRecord::INTERVAL_S: + os << "S "; + os << " start: " << (value & 0xffffffUL); + break; + case HashRecord::INTERVAL_L: + os << "L "; + os << " length: " << (value & 0xffffffUL); + break; + default: + os << "unknown"; + break; } return os.str(); } -TEST_F(ExtendTableFixture, DISABLED_FindExtendTableIntervals) -{ - using dragenos::reference::HashRecord; +TEST_F(ExtendTableFixture, DISABLED_FindExtendTableIntervals) { using dragenos::reference::Bucket; + using dragenos::reference::HashRecord; - const Bucket * const buckets = reinterpret_cast(globalTestEnvironment->table); + const Bucket *const buckets = + reinterpret_cast(globalTestEnvironment->table); const size_t bucketCount = hashtableConfig->getHashtableBucketCount(); size_t count = 0; - for (size_t bucketId = 0; bucketCount > bucketId; ++ bucketId) - { - //bool inSet = false; - //const auto begin = reinterpret_cast(buckets + bucketId); - //const auto end = begin + 8; + for (size_t bucketId = 0; bucketCount > bucketId; ++bucketId) { + // bool inSet = false; + // const auto begin = reinterpret_cast(buckets + + // bucketId); const auto end = begin + 8; bool hasS = false; bool hasL = false; - for (const auto &hashRecord: buckets[bucketId]) - { + for (const auto &hashRecord : buckets[bucketId]) { const auto type = hashRecord.getType(); - if ((HashRecord::CHAIN_CON_MASK == type) || (HashRecord::CHAIN_CON_LIST == type)) - { + if ((HashRecord::CHAIN_CON_MASK == type) || + (HashRecord::CHAIN_CON_LIST == type)) { break; } - //if ((HashRecord::INTERVAL_SL == type) || (HashRecord::INTERVAL_SLE == type) ||(HashRecord::INTERVAL_S == type) ||(HashRecord::INTERVAL_L == type)) + // if ((HashRecord::INTERVAL_SL == type) || (HashRecord::INTERVAL_SLE == + // type) ||(HashRecord::INTERVAL_S == type) ||(HashRecord::INTERVAL_L == + // type)) //{ - // if (!inSet) std::cerr << " Bucket id: " << bucketId << std::endl; - // inSet = true; - // std::cerr << " record: " << std::hex << std::setfill('0') - // << std::setw(8) << (hashRecord.getValue() >> 32) << ":" << (hashRecord.getValue() & 0xffffffff) - // << ": " << intervalToString(hashRecord) - // << std::setfill(' ') << std::dec << std::endl; - //} + // if (!inSet) std::cerr << " Bucket id: " << bucketId << std::endl; + // inSet = true; + // std::cerr << " record: " << std::hex << std::setfill('0') + // << std::setw(8) << (hashRecord.getValue() >> 32) << ":" << + // (hashRecord.getValue() & 0xffffffff) + // << ": " << intervalToString(hashRecord) + // << std::setfill(' ') << std::dec << std::endl; + // } hasS |= (HashRecord::INTERVAL_S == type); hasL |= (HashRecord::INTERVAL_L == type); } - if (hasS && hasL) - { + if (hasS && hasL) { std::cerr << " Bucket id: " << bucketId << ":" << std::endl; - for (const auto &hashRecord: buckets[bucketId]) - { + for (const auto &hashRecord : buckets[bucketId]) { const auto type = hashRecord.getType(); const auto value = hashRecord.getValue(); std::cerr << " record: " << std::hex << std::setfill('0') - << std::setw(8) << (value >> 32) << ":" << (value & 0xffffffff); - if ((HashRecord::INTERVAL_SL == type) || (HashRecord::INTERVAL_SLE == type) ||(HashRecord::INTERVAL_S == type) ||(HashRecord::INTERVAL_L == type)) - { + << std::setw(8) << (value >> 32) << ":" + << (value & 0xffffffff); + if ((HashRecord::INTERVAL_SL == type) || + (HashRecord::INTERVAL_SLE == type) || + (HashRecord::INTERVAL_S == type) || + (HashRecord::INTERVAL_L == type)) { std::cerr << ": " << intervalToString(hashRecord); } std::cerr << std::setfill(' ') << std::dec << std::endl; } ++count; } - //count += inSet; - //if (count > 100) + // count += inSet; + // if (count > 100) //{ - // break; - //} + // break; + // } } } -unsigned char getBase(const unsigned char *referenceData, size_t position) -{ +unsigned char getBase(const unsigned char *referenceData, size_t position) { const auto value = referenceData[position / 2]; return (position % 2) ? (value >> 4) : (value & 0xF); } -void ExtendTableFixture::showInterval(const size_t start, const size_t length, const unsigned seedLength, const unsigned halfWing) const -{ - const uint64_t * const extendTable = globalTestEnvironment->extendTable; - const unsigned char *referenceData = globalTestEnvironment->getReferenceData(); +void ExtendTableFixture::showInterval(const size_t start, const size_t length, + const unsigned seedLength, + const unsigned halfWing) const { + const uint64_t *const extendTable = globalTestEnvironment->extendTable; + const unsigned char *referenceData = + globalTestEnvironment->getReferenceData(); std::cerr << "start: " << start << " length: " << length << std::endl; - for (size_t i = 0; 12 > i; ++i) - { - if (2 == i) std::cerr << std::endl; + for (size_t i = 0; 12 > i; ++i) { + if (2 == i) + std::cerr << std::endl; const uint64_t extendRecord = extendTable[start + i - 2]; const size_t position = (extendRecord & 0xffffffff); - std::cerr << std::hex << std::setfill('0') << std::setw(8) << (extendRecord >> 32) << ":" << std::setw(8) << position << ":"; - for (unsigned j = 0; j < seedLength; ++j) std::cerr << " " << (unsigned)getBase(referenceData, position + j); + std::cerr << std::hex << std::setfill('0') << std::setw(8) + << (extendRecord >> 32) << ":" << std::setw(8) << position << ":"; + for (unsigned j = 0; j < seedLength; ++j) + std::cerr << " " << (unsigned)getBase(referenceData, position + j); std::cerr << std::setfill(' ') << std::dec << std::endl; } - std::cerr << std::endl; - for (size_t i = 0; 12 > i; ++i) - { - if (10 == i) std::cerr << std::endl; + std::cerr << std::endl; + for (size_t i = 0; 12 > i; ++i) { + if (10 == i) + std::cerr << std::endl; const uint64_t extendRecord = extendTable[start + length - 10 + i]; const size_t position = (extendRecord & 0xffffffff); - std::cerr << std::hex << std::setfill('0') << std::setw(8) << (extendRecord >> 32) << ":" << std::setw(8) << position << ":"; - for (unsigned j = 0; j < seedLength; ++j) std::cerr << " " << (unsigned)getBase(referenceData, position + j); + std::cerr << std::hex << std::setfill('0') << std::setw(8) + << (extendRecord >> 32) << ":" << std::setw(8) << position << ":"; + for (unsigned j = 0; j < seedLength; ++j) + std::cerr << " " << (unsigned)getBase(referenceData, position + j); std::cerr << std::setfill(' ') << std::dec << std::endl; } } -TEST_F(ExtendTableFixture, ExploreExtendTable) -{ +TEST_F(ExtendTableFixture, ExploreExtendTable) { // First example: EXTEND followed by and interval: // record: 7bf9df58:f2300726 // record: 7bf9df59:f9000000: SLE exlift: 0 length: 0 start: 0 // record: 7bf9df58:fa582838: S start: 5777464 // record: 7bf9df5a:fb0168c4: L length: 92356 // - // Second example: + // Second example: // record: 044d5968:f2280559 // record: 044d5969:f9000000: SLE exlift: 0 length: 0 start: 0 // record: 044d5968:fa4092ac: S start: 4231852 @@ -250,125 +251,141 @@ TEST_F(ExtendTableFixture, ExploreExtendTable) // record: c851b14a:fb01f3b0: L length: 127920 // // First: - //const size_t start = 5777464; - //const size_t length = 92356; + // const size_t start = 5777464; + // const size_t length = 92356; // Second: - //const size_t start = 4231852; - //const size_t length = 80514; + // const size_t start = 4231852; + // const size_t length = 80514; // Third: - //const size_t start = 0x27c4fc2; // both MSB on SLE and Carry on S - //const size_t length = 106658; + // const size_t start = 0x27c4fc2; // both MSB on SLE and Carry on S + // const size_t length = 106658; // Fourth: - //const size_t start = 0x1b46391e; // both MSB on SLE and Carry on S - //const size_t length = 96576; + // const size_t start = 0x1b46391e; // both MSB on SLE and Carry on S + // const size_t length = 96576; // Fifth: const size_t start = 0x1abb10ae; // MSB but no Carry const size_t length = 127920; showInterval(start, length); } -TEST_F(ExtendTableFixture, CheckExtendInterval) -{ - const size_t start = 257097262; +TEST_F(ExtendTableFixture, CheckExtendInterval) { + const size_t start = 257097262; const size_t length = 24; showInterval(start, length); } -std::unique_ptr Environment::hashtableConfig(nullptr); +std::unique_ptr + Environment::hashtableConfig(nullptr); std::unique_ptr Environment::hashtable(nullptr); unsigned char *Environment::referenceData = nullptr; size_t Environment::referenceFileSize = 0; -std::vector slurp(const boost::filesystem::path &filePath) -{ +std::vector slurp(const boost::filesystem::path &filePath) { const ssize_t fileSize = file_size(filePath); - if (0 == fileSize) - { + if (0 == fileSize) { return std::vector(); } std::vector ret; ret.resize(fileSize); std::ifstream is(filePath.string()); - if (is && is.read(ret.data(), fileSize) && (fileSize == is.gcount())) - { + if (is && is.read(ret.data(), fileSize) && (fileSize == is.gcount())) { return ret; } - BOOST_THROW_EXCEPTION(dragenos::common::IoException(errno, "Failed to read file")); + BOOST_THROW_EXCEPTION( + dragenos::common::IoException(errno, "Failed to read file")); } -void Environment::SetUp() -{ +void Environment::SetUp() { const auto argv = testing::internal::GetArgvs(); namespace bfs = boost::filesystem; - ASSERT_TRUE(argv.size() > 1 || (nullptr != getenv("REFDIR"))) << "Checking for reference-directory on the command line or in the environment variable REFDIR"; + ASSERT_TRUE(argv.size() > 1 || (nullptr != getenv("REFDIR"))) + << "Checking for reference-directory on the command line or in the " + "environment variable REFDIR"; const bfs::path referenceDir(argv.size() > 1 ? argv[1] : getenv("REFDIR")); - std::cerr << "\n" << argv[0] << ": using rederence directory: " << referenceDir << "\n" << std::endl; - ASSERT_TRUE(exists(referenceDir)) << "checking the existence of the reference-directory: " << referenceDir; + std::cerr << "\n" + << argv[0] << ": using rederence directory: " << referenceDir + << "\n" + << std::endl; + ASSERT_TRUE(exists(referenceDir)) + << "checking the existence of the reference-directory: " << referenceDir; const bfs::path hashtableConfigFile = referenceDir / "hash_table.cfg.bin"; - ASSERT_TRUE(exists(hashtableConfigFile)) << "checking the existence of the hashtable config: " << hashtableConfigFile; + ASSERT_TRUE(exists(hashtableConfigFile)) + << "checking the existence of the hashtable config: " + << hashtableConfigFile; const std::vector config = slurp(hashtableConfigFile); hashtableConfig.reset(new HashtableConfig(config.data(), config.size())); const bfs::path hashtableFile = referenceDir / "hash_table.bin"; const bfs::path extendTableFile = referenceDir / "extend_table.bin"; - ASSERT_TRUE(exists(hashtableFile)) << "checking the existence of the uncompressed hashtable: " << hashtableFile; + ASSERT_TRUE(exists(hashtableFile)) + << "checking the existence of the uncompressed hashtable: " + << hashtableFile; hashtableFd = open(hashtableFile.c_str(), O_RDONLY, 0); - ASSERT_LT(-1, hashtableFd) << "failed to open hashtable: " << hashtableFile << ": " << strerror(errno); + ASSERT_LT(-1, hashtableFd) << "failed to open hashtable: " << hashtableFile + << ": " << strerror(errno); const auto tableSize = boost::filesystem::file_size(hashtableFile); ASSERT_EQ(tableSize, hashtableConfig->getHashtableBytes()); const int prot = PROT_READ; const int flags = MAP_PRIVATE | MAP_NORESERVE; const int offset = 0; - table = static_cast (mmap(NULL, tableSize, prot, flags, hashtableFd, offset)); - ASSERT_NE(MAP_FAILED, table) << "failed to map hashtable file: " << strerror(errno); - if (8 <= hashtableConfig.get()->getHashtableVersion()) - { + table = static_cast( + mmap(NULL, tableSize, prot, flags, hashtableFd, offset)); + ASSERT_NE(MAP_FAILED, table) + << "failed to map hashtable file: " << strerror(errno); + if (8 <= hashtableConfig.get()->getHashtableVersion()) { extendTableFd = open(extendTableFile.c_str(), O_RDONLY, 0); - ASSERT_LT(-1, extendTableFd) << "failed to open extendTable: " << extendTableFile << ": " << strerror(errno); + ASSERT_LT(-1, extendTableFd) + << "failed to open extendTable: " << extendTableFile << ": " + << strerror(errno); const auto extendTableSize = boost::filesystem::file_size(extendTableFile); ASSERT_EQ(extendTableSize, hashtableConfig->getExtendTableBytes()); - extendTable = static_cast (mmap(NULL, extendTableSize, prot, flags, extendTableFd, offset)); - ASSERT_NE(MAP_FAILED, extendTable) << "failed to map extendTable file: " << strerror(errno); + extendTable = static_cast( + mmap(NULL, extendTableSize, prot, flags, extendTableFd, offset)); + ASSERT_NE(MAP_FAILED, extendTable) + << "failed to map extendTable file: " << strerror(errno); } - // As Hashtable can't be initialized after construction, it has to be built dynamically + // As Hashtable can't be initialized after construction, it has to be built + // dynamically hashtable.reset(new Hashtable(hashtableConfig.get(), table, extendTable)); - // We need the actual reference to check that the result of the queries is consistent with the actual reference + // We need the actual reference to check that the result of the queries is + // consistent with the actual reference const bfs::path referenceFile = referenceDir / "reference.bin"; - ASSERT_TRUE(exists(referenceFile)) << "checking the existence of the packed reference sequence: " << referenceFile; + ASSERT_TRUE(exists(referenceFile)) + << "checking the existence of the packed reference sequence: " + << referenceFile; referenceFileSize = file_size(referenceFile); referenceFd = open(referenceFile.c_str(), O_RDONLY, 0); - ASSERT_NE(-1, referenceFd) << "failed to open reference file " << referenceFile << ": " << strerror(errno); - referenceData = static_cast(mmap(NULL, referenceFileSize, prot, flags, referenceFd, offset)); - ASSERT_NE(MAP_FAILED, referenceData) << "failed to mmap reference file " << referenceFile << ": " << strerror(errno); + ASSERT_NE(-1, referenceFd) << "failed to open reference file " + << referenceFile << ": " << strerror(errno); + referenceData = static_cast( + mmap(NULL, referenceFileSize, prot, flags, referenceFd, offset)); + ASSERT_NE(MAP_FAILED, referenceData) + << "failed to mmap reference file " << referenceFile << ": " + << strerror(errno); } -void Environment::TearDown() -{ - if (-1 != hashtableFd) - { +void Environment::TearDown() { + if (-1 != hashtableFd) { close(hashtableFd); hashtableFd = -1; } - if (-1 != extendTableFd) - { + if (-1 != extendTableFd) { close(extendTableFd); extendTableFd = -1; } - if ((nullptr != table) && (MAP_FAILED != table)) - { - const auto tableSize = hashtableConfig->getHashtableBytes(); + if ((nullptr != table) && (MAP_FAILED != table)) { + const auto tableSize = hashtableConfig->getHashtableBytes(); munmap(table, tableSize); table = nullptr; } - if ((nullptr != extendTable) && (MAP_FAILED != extendTable)) - { + if ((nullptr != extendTable) && (MAP_FAILED != extendTable)) { const auto extendTableSize = hashtableConfig->getExtendTableBytes(); munmap(extendTable, extendTableSize); extendTable = nullptr; } hashtable.reset(nullptr); - if (-1 != referenceFd) close(referenceFd); - if ((nullptr != referenceData) && (MAP_FAILED != referenceData)) - { + if (-1 != referenceFd) + close(referenceFd); + if ((nullptr != referenceData) && (MAP_FAILED != referenceData)) { munmap(referenceData, referenceFileSize); referenceData = nullptr; } @@ -377,11 +394,12 @@ void Environment::TearDown() ExtendTableFixture::HashtableConfig *ExtendTableFixture::hashtableConfig; ExtendTableFixture::Hashtable *ExtendTableFixture::hashtable; ExtendTableFixture::ReferenceSequence ExtendTableFixture::referenceSequence; -void ExtendTableFixture::SetUpTestCase() -{ +void ExtendTableFixture::SetUpTestCase() { hashtableConfig = Environment::getHashtableConfig(); hashtable = Environment::getHashtable(); - referenceSequence.reset(hashtableConfig->getTrimmedRegions(), Environment::getReferenceData(), Environment::getReferenceSize()); + referenceSequence.reset(hashtableConfig->getTrimmedRegions(), + Environment::getReferenceData(), + Environment::getReferenceSize()); } int main(int argc, char **argv) { @@ -389,41 +407,45 @@ int main(int argc, char **argv) { //::testing::FLAGS_gtest_throw_on_failure = true; ::testing::FLAGS_gtest_throw_on_failure = false; globalTestEnvironment = new Environment; - /* ::testing::Environment* const env = */ ::testing::AddGlobalTestEnvironment(globalTestEnvironment); + /* ::testing::Environment* const env = */ ::testing::AddGlobalTestEnvironment( + globalTestEnvironment); return RUN_ALL_TESTS(); delete globalTestEnvironment; } // Hashes a data buffer using a CRC polynomial of the same length, bit-by-bit. -// The same-length requirement makes the hash reversible. No information is lost, -// and different data words are guaranteed to have different hashes. This means -// that a key match can be confirmed by comparing the hash alone, or rather the -// portion not implicitly matched by use as address bits. -void* crcHashSlow(int bits, void const* poly, void const* data, void* hash) -{ +// The same-length requirement makes the hash reversible. No information is +// lost, and different data words are guaranteed to have different hashes. This +// means that a key match can be confirmed by comparing the hash alone, or +// rather the portion not implicitly matched by use as address bits. +void *crcHashSlow(int bits, void const *poly, void const *data, void *hash) { int bytes = (bits + 7) >> 3, topByte = bytes - 1; - int topBitMask = (1 << ((bits + 7) % 8)), topByteMask = ((topBitMask << 1) - 1); + int topBitMask = (1 << ((bits + 7) % 8)), + topByteMask = ((topBitMask << 1) - 1); int i, j, subtract; -#define POLY ((unsigned char*)poly) -#define HASH ((unsigned char*)hash) - // Since data and polynomial are the same length, copy in all the data bytes immediately. - // This data order doesn't match normal CRC computation, which by processing byte zero first, - // effectively treats it as the most significant byte of the dividend. But with odd bit - // lengths, this works better. +#define POLY ((unsigned char *)poly) +#define HASH ((unsigned char *)hash) + // Since data and polynomial are the same length, copy in all the data bytes + // immediately. This data order doesn't match normal CRC computation, which by + // processing byte zero first, effectively treats it as the most significant + // byte of the dividend. But with odd bit lengths, this works better. memcpy(hash, data, bytes); // Loop through the bits for (i = 0; i < bits; i++) { // Plan to subtract the polynomial if the MSB is 1 subtract = (HASH[topByte] & topBitMask); - // Left-shift the remainder (corresponds to right-shifting the polynomial position) - for (j = topByte; j > 0; j--) HASH[j] = (HASH[j] << 1) | (HASH[j - 1] >> 7); + // Left-shift the remainder (corresponds to right-shifting the polynomial + // position) + for (j = topByte; j > 0; j--) + HASH[j] = (HASH[j] << 1) | (HASH[j - 1] >> 7); HASH[0] <<= 1; // Subtract the polynomial if required to cancel the MSB shifted out if (subtract) - for (j = 0; j < bytes; j++) HASH[j] ^= POLY[j]; + for (j = 0; j < bytes; j++) + HASH[j] ^= POLY[j]; } // Mask off unused positions in the top byte HASH[topByte] &= topByteMask; @@ -434,17 +456,18 @@ void* crcHashSlow(int bits, void const* poly, void const* data, void* hash) } // Optimized 64-bit version -void* crcHash64Init(int bits, void const* poly) -{ - int bytes = (bits + 7) >> 3, i, j; - int bufQw = (1 + 256 * bytes); - uint64_t *init = (uint64_t*)calloc(8, bufQw), *p = init; - uint64_t data; - - if (!init) return NULL; +void *crcHash64Init(int bits, void const *poly) { + int bytes = (bits + 7) >> 3, i, j; + int bufQw = (1 + 256 * bytes); + uint64_t *init = (uint64_t *)calloc(8, bufQw), *p = init; + uint64_t data; + + if (!init) + return NULL; // Store the byte count in the init buffer *p++ = bytes; - // Store the slow-hash of each byte value 0-255 in each byte position in a data buffer + // Store the slow-hash of each byte value 0-255 in each byte position in a + // data buffer for (i = 0; i < bytes; i++) { for (j = 0; j < 256; j++) { data = (uint64_t)j << (i << 3); @@ -455,10 +478,9 @@ void* crcHash64Init(int bits, void const* poly) } // Optimized 64-bit version -void* crcHash64(uint64_t* init, uint8_t const* data, uint64_t* hash) -{ - uint64_t h = 0; - int bytes = *init++; +void *crcHash64(uint64_t *init, uint8_t const *data, uint64_t *hash) { + uint64_t h = 0; + int bytes = *init++; while (bytes--) { h ^= init[*data++]; @@ -468,33 +490,37 @@ void* crcHash64(uint64_t* init, uint8_t const* data, uint64_t* hash) return hash; } -uint8_t encodebaseTo2Bits(const char c) -{ - switch (c) - { - case 'A': return 0; break; - case 'C': return 1; break; - case 'G': return 2; break; - case 'T': return 3; break; - default: throw std::invalid_argument(std::string("base must be ACGT: ") + c); +uint8_t encodebaseTo2Bits(const char c) { + switch (c) { + case 'A': + return 0; + break; + case 'C': + return 1; + break; + case 'G': + return 2; + break; + case 'T': + return 3; + break; + default: + throw std::invalid_argument(std::string("base must be ACGT: ") + c); }; } -//std::vector generateRefSeq(std::vector bases) -std::vector generateRefSeq(std::string bases) -{ +// std::vector generateRefSeq(std::vector bases) +std::vector generateRefSeq(std::string bases) { assert(0 == (bases.size() % 4)); std::vector refSeq; refSeq.resize((bases.size() + 3) / 4); uint8_t seqByte = 0; - for (unsigned i = 0; bases.size() > i; ++i) - { + for (unsigned i = 0; bases.size() > i; ++i) { const auto c = bases[i]; const uint8_t encoded = encodebaseTo2Bits(c); seqByte |= ((encoded & 3) << ((i & 3) << 2)); - if (3 == (i & 3)) - { - refSeq[i/4] = seqByte; + if (3 == (i & 3)) { + refSeq[i / 4] = seqByte; seqByte = 0; } } diff --git a/tests/HashtableGtest.cpp b/tests/HashtableGtest.cpp index 3d68c20..87ee616 100644 --- a/tests/HashtableGtest.cpp +++ b/tests/HashtableGtest.cpp @@ -1,51 +1,47 @@ #include "gtest/gtest.h" +#include +#include #include +#include +#include #include +#include #include #include -#include -#include -#include -#include -#include #include -#include -#include -#include #include +#include +#include +#include #include "common/Bits.hpp" -#include "sequences/Read.hpp" -#include "sequences/Seed.hpp" +#include "common/Exceptions.hpp" #include "reference/Hashtable.hpp" #include "reference/ReferenceSequence.hpp" -#include "common/Exceptions.hpp" +#include "sequences/Read.hpp" +#include "sequences/Seed.hpp" // copied from git/dragen/sec/common/hash_generation/crc_hash.c // implementation at end of file -void* crcHashSlow(int bits, void const* poly, void const* data, void* hash); -void* crcHash64Init(int bits, void const* poly); -void* crcHash64(uint64_t* init, uint8_t const* data, uint64_t* hash); +void *crcHashSlow(int bits, void const *poly, void const *data, void *hash); +void *crcHash64Init(int bits, void const *poly); +void *crcHash64(uint64_t *init, uint8_t const *data, uint64_t *hash); // generate refSeq as it is done in hash_table.c // encoding 4 bases per byte - ACGT -> 0123 -//std::vector generateRefSeq(std::vector bases); +// std::vector generateRefSeq(std::vector bases); std::vector generateRefSeq(std::string bases); -class HashtableFixture: public ::testing::Test -{ +class HashtableFixture : public ::testing::Test { public: - HashtableFixture() - { - } + HashtableFixture() {} // The name was changed in gtest 1.8: static void SetUpTestSuite() static void SetUpTestCase(); - static void TearDownTestCase() - { - } + static void TearDownTestCase() {} + protected: typedef dragenos::reference::HashtableConfig HashtableConfig; typedef dragenos::reference::HashtableTraits HashtableTraits; @@ -54,29 +50,29 @@ class HashtableFixture: public ::testing::Test static HashtableConfig *hashtableConfig; static Hashtable *hashtable; static ReferenceSequence referenceSequence; - //static const uint64_t fastqOffset = 64; // base quality offset, +64 or +33 - //static const bool convertCtoT = false; // whether to convert C->T for methylation - //static const bool convertGtoA = false; // whether to convert G->A for methylation + // static const uint64_t fastqOffset = 64; // base quality offset, +64 or +33 + // static const bool convertCtoT = false; // whether to convert C->T for + // methylation static const bool convertGtoA = false; // whether to + // convert G->A for methylation }; - class Environment : public ::testing::Environment { public: typedef dragenos::reference::HashtableConfig HashtableConfig; typedef dragenos::reference::HashtableTraits HashtableTraits; typedef dragenos::reference::Hashtable Hashtable; typedef dragenos::reference::ReferenceSequence ReferenceSequence; - Environment() : hashtableFd(-1), extendTableFd(-1), table(nullptr), extendTable(nullptr) - { + Environment() + : hashtableFd(-1), extendTableFd(-1), table(nullptr), + extendTable(nullptr) {} + virtual ~Environment() {} + static unsigned char *getReferenceData() { return referenceData; } + static size_t getReferenceSize() { return referenceFileSize; } + static Hashtable *getHashtable() { return hashtable.get(); } + static HashtableConfig *getHashtableConfig() { return hashtableConfig.get(); } + static const std::vector &getHashtableConfigText() { + return hashtableConfigText; } - virtual ~Environment() - { - } - static unsigned char *getReferenceData() {return referenceData;} - static size_t getReferenceSize() {return referenceFileSize;} - static Hashtable *getHashtable() {return hashtable.get();} - static HashtableConfig *getHashtableConfig() {return hashtableConfig.get();} - static const std::vector &getHashtableConfigText() {return hashtableConfigText;} // Override this to define how to set up the environment. void SetUp() override; @@ -84,15 +80,18 @@ class Environment : public ::testing::Environment { void TearDown() override; static const std::vector sequence; static const std::vector qualities; + private: static std::vector hashtableConfigText; static std::unique_ptr hashtableConfig; static std::unique_ptr hashtable; int hashtableFd; int extendTableFd; + public: - uint64_t* table; - uint64_t* extendTable; + uint64_t *table; + uint64_t *extendTable; + private: int referenceFd; static size_t referenceFileSize; @@ -100,35 +99,39 @@ class Environment : public ::testing::Environment { }; //::testing::Environment* globalTestEnvironment = nullptr; -Environment* globalTestEnvironment = nullptr; +Environment *globalTestEnvironment = nullptr; -std::pair keyAndValue(const std::string &line) -{ +std::pair keyAndValue(const std::string &line) { const auto equal = line.find(" = "); - if (std::string::npos == equal) - { - BOOST_THROW_EXCEPTION(std::invalid_argument(std::string("unexpected line: ") + line)); + if (std::string::npos == equal) { + BOOST_THROW_EXCEPTION( + std::invalid_argument(std::string("unexpected line: ") + line)); } auto key = line.substr(0, equal); - while (key.back() == ' ') key.pop_back(); + while (key.back() == ' ') + key.pop_back(); const auto value = line.substr(equal + 3); return std::pair(key, value); } - -struct TestRead -{ +struct TestRead { std::string name_; std::string bases_; std::string qualities_; - dragenos::sequences::Read::Name name() const {return dragenos::sequences::Read::Name(name_.begin(), name_.end());} - dragenos::sequences::Read::Bases bases() const {return dragenos::sequences::Read::Bases(bases_.begin(), bases_.end());} - dragenos::sequences::Read::Qualities qualities() const {return dragenos::sequences::Read::Qualities(qualities_.begin(), qualities_.end());} + dragenos::sequences::Read::Name name() const { + return dragenos::sequences::Read::Name(name_.begin(), name_.end()); + } + dragenos::sequences::Read::Bases bases() const { + return dragenos::sequences::Read::Bases(bases_.begin(), bases_.end()); + } + dragenos::sequences::Read::Qualities qualities() const { + return dragenos::sequences::Read::Qualities(qualities_.begin(), + qualities_.end()); + } }; -TEST_F(HashtableFixture, getBits) -{ +TEST_F(HashtableFixture, getBits) { using namespace dragenos::common::bits; const uint64_t v = 0xFEDCBA9876543210; ASSERT_EQ(v, (getBits<0, 64>(v))); @@ -150,171 +153,171 @@ TEST_F(HashtableFixture, getBits) ASSERT_EQ(15u, (getBits<60, 4>(v))); } -TEST_F(HashtableFixture, DISABLED_ExploreHhashtablev8) -{ - -} +TEST_F(HashtableFixture, DISABLED_ExploreHhashtablev8) {} -std::string intervalToString(const dragenos::reference::HashRecord hashRecord) -{ +std::string intervalToString(const dragenos::reference::HashRecord hashRecord) { std::ostringstream os; using dragenos::reference::HashRecord; const auto type = hashRecord.getType(); const auto value = hashRecord.getValue(); - switch (type) - { - case HashRecord::INTERVAL_SL: - os << "SL"; - if (!hashRecord.isReverseComplement()) - { - os << "0"; - os << " length: " << ((value >> 15) & 0x1ffUL); - os << " start: " << (value & 0x7fffUL); - } - else - { - os << "1"; - os << " length: " << ((value >> 8) & 0xffffUL); - os << " start: " << (value & 0xffUL); - } - break; - case HashRecord::INTERVAL_SLE: - os << "SLE"; - os << " exlift: " << ((value >> 16) & 0xffUL); - os << " length: " << ((value >> 8) & 0xffUL); + switch (type) { + case HashRecord::INTERVAL_SL: + os << "SL"; + if (!hashRecord.isReverseComplement()) { + os << "0"; + os << " length: " << ((value >> 15) & 0x1ffUL); + os << " start: " << (value & 0x7fffUL); + } else { + os << "1"; + os << " length: " << ((value >> 8) & 0xffffUL); os << " start: " << (value & 0xffUL); - break; - case HashRecord::INTERVAL_S: - os << "S "; - os << " start: " << (value & 0xffffffUL); - break; - case HashRecord::INTERVAL_L: - os << "L "; - os << " length: " << (value & 0xffffffUL); - break; - default: os << "unknown"; break; + } + break; + case HashRecord::INTERVAL_SLE: + os << "SLE"; + os << " exlift: " << ((value >> 16) & 0xffUL); + os << " length: " << ((value >> 8) & 0xffUL); + os << " start: " << (value & 0xffUL); + break; + case HashRecord::INTERVAL_S: + os << "S "; + os << " start: " << (value & 0xffffffUL); + break; + case HashRecord::INTERVAL_L: + os << "L "; + os << " length: " << (value & 0xffffffUL); + break; + default: + os << "unknown"; + break; } return os.str(); } -TEST_F(HashtableFixture, DISABLED_FindExtendTableIntervals) -{ - using dragenos::reference::HashRecord; +TEST_F(HashtableFixture, DISABLED_FindExtendTableIntervals) { using dragenos::reference::Bucket; + using dragenos::reference::HashRecord; - const Bucket * const buckets = reinterpret_cast(globalTestEnvironment->table); + const Bucket *const buckets = + reinterpret_cast(globalTestEnvironment->table); const size_t bucketCount = hashtableConfig->getHashtableBucketCount(); size_t count = 0; - for (size_t bucketId = 0; bucketCount > bucketId; ++ bucketId) - { - //bool inSet = false; - //const auto begin = reinterpret_cast(buckets + bucketId); - //const auto end = begin + 8; + for (size_t bucketId = 0; bucketCount > bucketId; ++bucketId) { + // bool inSet = false; + // const auto begin = reinterpret_cast(buckets + + // bucketId); const auto end = begin + 8; bool hasS = false; bool hasL = false; - for (const auto &hashRecord: buckets[bucketId]) - { + for (const auto &hashRecord : buckets[bucketId]) { const auto type = hashRecord.getType(); - if ((HashRecord::CHAIN_CON_MASK == type) || (HashRecord::CHAIN_CON_LIST == type)) - { + if ((HashRecord::CHAIN_CON_MASK == type) || + (HashRecord::CHAIN_CON_LIST == type)) { break; } - //if ((HashRecord::INTERVAL_SL == type) || (HashRecord::INTERVAL_SLE == type) ||(HashRecord::INTERVAL_S == type) ||(HashRecord::INTERVAL_L == type)) + // if ((HashRecord::INTERVAL_SL == type) || (HashRecord::INTERVAL_SLE == + // type) ||(HashRecord::INTERVAL_S == type) ||(HashRecord::INTERVAL_L == + // type)) //{ - // if (!inSet) std::cerr << " Bucket id: " << bucketId << std::endl; - // inSet = true; - // std::cerr << " record: " << std::hex << std::setfill('0') - // << std::setw(8) << (hashRecord.getValue() >> 32) << ":" << (hashRecord.getValue() & 0xffffffff) - // << ": " << intervalToString(hashRecord) - // << std::setfill(' ') << std::dec << std::endl; - //} + // if (!inSet) std::cerr << " Bucket id: " << bucketId << std::endl; + // inSet = true; + // std::cerr << " record: " << std::hex << std::setfill('0') + // << std::setw(8) << (hashRecord.getValue() >> 32) << ":" << + // (hashRecord.getValue() & 0xffffffff) + // << ": " << intervalToString(hashRecord) + // << std::setfill(' ') << std::dec << std::endl; + // } hasS |= (HashRecord::INTERVAL_S == type); hasL |= (HashRecord::INTERVAL_L == type); } - if (hasS && hasL) - { + if (hasS && hasL) { std::cerr << " Bucket id: " << bucketId << ":" << std::endl; - for (const auto &hashRecord: buckets[bucketId]) - { + for (const auto &hashRecord : buckets[bucketId]) { const auto type = hashRecord.getType(); const auto value = hashRecord.getValue(); std::cerr << " record: " << std::hex << std::setfill('0') - << std::setw(8) << (value >> 32) << ":" << (value & 0xffffffff); - if ((HashRecord::INTERVAL_SL == type) || (HashRecord::INTERVAL_SLE == type) ||(HashRecord::INTERVAL_S == type) ||(HashRecord::INTERVAL_L == type)) - { + << std::setw(8) << (value >> 32) << ":" + << (value & 0xffffffff); + if ((HashRecord::INTERVAL_SL == type) || + (HashRecord::INTERVAL_SLE == type) || + (HashRecord::INTERVAL_S == type) || + (HashRecord::INTERVAL_L == type)) { std::cerr << ": " << intervalToString(hashRecord); } std::cerr << std::setfill(' ') << std::dec << std::endl; } ++count; } - //count += inSet; - //if (count > 100) + // count += inSet; + // if (count > 100) //{ - // break; - //} + // break; + // } } } -TEST_F(HashtableFixture, ExploreExtendTable) -{ - const uint64_t * const extendTable = globalTestEnvironment->extendTable; +TEST_F(HashtableFixture, ExploreExtendTable) { + const uint64_t *const extendTable = globalTestEnvironment->extendTable; // First example: EXTEND followed by and interval: // record: 7bf9df58:f2300726 // record: 7bf9df59:f9000000: SLE exlift: 0 length: 0 start: 0 // record: 7bf9df58:fa582838: S start: 5777464 // record: 7bf9df5a:fb0168c4: L length: 92356 // - // Secod example: + // Secod example: // record: 044d5968:f2280559 // record: 044d5969:f9000000: SLE exlift: 0 length: 0 start: 0 // record: 044d5968:fa4092ac: S start: 4231852 // record: 044d596a:fb013a82: L length: 80514 // - //const size_t start = 4231852; + // const size_t start = 4231852; const size_t start = 0; - for (size_t i = 0; 10 > i; ++i) - { - std::cerr << std::hex << std::setfill('0') << std::setw(8) << (extendTable[start + i] >> 32) << ":" << std::setw(8) << (extendTable[start + i] & 0xffffffff) - << std::setfill(' ') << std::dec << std::endl; + for (size_t i = 0; 10 > i; ++i) { + std::cerr << std::hex << std::setfill('0') << std::setw(8) + << (extendTable[start + i] >> 32) << ":" << std::setw(8) + << (extendTable[start + i] & 0xffffffff) << std::setfill(' ') + << std::dec << std::endl; } } TEST_F(HashtableFixture, DISABLED_FindSeedsInTheBeginning) -//TEST_F(HashtableFixture, FindSeedsInTheBeginning) +// TEST_F(HashtableFixture, FindSeedsInTheBeginning) { using dragenos::reference::HashRecord; ASSERT_EQ(sizeof(uint64_t), sizeof(HashRecord)); const size_t hashtableBytes = hashtableConfig->getHashtableBytes(); const size_t hashtableRecords = hashtableBytes / 8; - const HashRecord * const table = reinterpret_cast(globalTestEnvironment->table); + const HashRecord *const table = + reinterpret_cast(globalTestEnvironment->table); std::vector records; records.reserve(100000); uint32_t minPosition = 999999999; bool printNext = false; - auto printRecord = [](size_t i, const HashRecord &record) - { - std::cerr << "i: " << std::setw(10) << i << ", position: " << std::setw(10) << record.getPosition() << std::hex << ", bucket: " << std::setw(8) << (i/8) - << ", record: " << std::setw(8) << (record.getValue() >> 32) << " " << std::setfill('0') << std::setw(2) << (unsigned)record.getThreadId() - << " " << std::setfill('0') << std::setw(6) << record.getHashBits() << std::setfill(' ') << " " << record.isExtendedSeed() << " " << record.isLastInThread() << " " << record.isReverseComplement() << std::dec << std::endl; + auto printRecord = [](size_t i, const HashRecord &record) { + std::cerr << "i: " << std::setw(10) << i << ", position: " << std::setw(10) + << record.getPosition() << std::hex + << ", bucket: " << std::setw(8) << (i / 8) + << ", record: " << std::setw(8) << (record.getValue() >> 32) + << " " << std::setfill('0') << std::setw(2) + << (unsigned)record.getThreadId() << " " << std::setfill('0') + << std::setw(6) << record.getHashBits() << std::setfill(' ') + << " " << record.isExtendedSeed() << " " + << record.isLastInThread() << " " << record.isReverseComplement() + << std::dec << std::endl; }; - for (size_t i = 0; hashtableRecords > i; ++i) - { + for (size_t i = 0; hashtableRecords > i; ++i) { const auto &record = table[i]; - if (printNext) - { + if (printNext) { printNext = record.isHit() && !record.isLastInThread(); printRecord(i, record); continue; } - if(record.isHit() && !record.isExtendedSeed() /* && !record.isReverseComplement() */ && (0 != record.getPosition())) - { - if (record.getPosition() < 180000) - { + if (record.isHit() && + !record.isExtendedSeed() /* && !record.isReverseComplement() */ && + (0 != record.getPosition())) { + if (record.getPosition() < 180000) { records.push_back(record); - //if (record.getPosition() <=163970) - if (record.getPosition() <=164015 + 20) - { + // if (record.getPosition() <=163970) + if (record.getPosition() <= 164015 + 20) { printRecord(i, record); printNext = !record.isLastInThread(); } @@ -322,204 +325,221 @@ TEST_F(HashtableFixture, DISABLED_FindSeedsInTheBeginning) minPosition = std::min(minPosition, record.getPosition()); } } - std::cerr << "size: " << records.size() << ", min: " << minPosition << std::endl; - // TODO: check that we have the good encoding and Crc hash for the seed at position 163840 -/* - -164012: 0035de20855c4d8e: 005937e5dcd: 0164df977: 05c4d8e: -164013: 002643a64efec778: 003f600b72c: 00fd802dc: 07ec778: -164014: 0024e8498f420b6f: 003d20b9d54: 00f482e75: 0420b6f: -164015: 003f4027ea80c9a2: 0068c2421c6: 01a309087: 000c9a2: -164016: 00299c8789f45cc2: 0044eb407c7: 0113ad01f: 0745cc2: -164017: 002c2baf9129799a: 0049285ac86: 0124a16b2: 029799a: -164018: 00003ec58be38a7d: 000067f72fa: 00019fdcb: 0638a7d: -164019: 0006515647af6f7b: 000a76b6e6a: 0029dadb9: 02f6f7b: -164020: 0001863cc7e83890: 00028654ab1: 000a1952a: 0683890: -164021: 001b7173f2541ca2: 002d73e8095: 00b5cfa02: 0541ca2: -164022: 002090d28fc16982: 0035efdcbe1: 00d7bf72f: 0416982: -164023: 002e68bad0a434ca: 004cdd75698: 013375d5a: 02434ca: -164024: 0000ae00db80d929: 000120316b8: 000480c5a: 000d929: -164025: 0006756713b7bbae: 000ab272b89: 002ac9cae: 037bbae: -164026: 001aa977ab443af0: 002c28ae33a: 00b0a2b8c: 0443af0: -164027: 001dbaa1297f1c3a: 00313d1aeca: 00c4f46bb: 07f1c3a: -164028: 00212226390ba9a4: 0036e08f4e7: 00db823d3: 00ba9a4: -164029: 001848898e42ea69: 00283823d39: 00a0e08f4: 042ea69: -164030: 00204cc546c7377e: 00357f26bd2: 00d5fc9af: 047377e: -164031: 0023271f3e1819c4: 003a38cbbed: 00e8e32ef: 01819c4: -164032: 0025b167d37bbcc0: 003e6dd3f63: 00f9b74fd: 07bbcc0: -164033: 00127ca53770fdb6: 001e9e71a3d: 007a79c68: 070fdb6: -164034: 0022d3a73e8851c7: 0039ae8cff9: 00e6ba33f: 00851c7: -164035: 0035de20855c4d8e: 005937e5dcd: 0164df977: 05c4d8e: -164036: 002643a64efec778: 003f600b72c: 00fd802dc: 07ec778: -164037: 0024e8498f420b6f: 003d20b9d54: 00f482e75: 0420b6f: -164038: 0009287b35d36195: 000f2b0c112: 003cac304: 0536195: -164039: 00127ca53770fdb6: 001e9e71a3d: 007a79c68: 070fdb6: -164040: 0022d3a73e8851c7: 0039ae8cff9: 00e6ba33f: 00851c7: -164041: 0035de20855c4d8e: 005937e5dcd: 0164df977: 05c4d8e: -164042: 002643a64efec778: 003f600b72c: 00fd802dc: 07ec778: -164043: 0024e8498f420b6f: 003d20b9d54: 00f482e75: 0420b6f: - - *********** Hashtable V8 ************************* -i: 85800915, position: 164020, bucket: a3a6fa, record: 5b41c480 16 683890 0 0 0 -i: 85800916, position: 491275458, bucket: a3a6fa, record: 5b41c480 16 683890 0 0 0 -i: 85800917, position: 2361018399, bucket: a3a6fa, record: 5b41c480 16 683890 0 0 0 -i: 85800918, position: 1387614846, bucket: a3a6fa, record: 5b41c481 16 683890 0 0 1 -i: 85800919, position: 2727213102, bucket: a3a6fa, record: 5b41c483 16 683890 0 1 1 -i: 351104461, position: 164019, bucket: 29dadb9, record: 357b7bd8 0d 2f6f7b 0 0 0 -i: 351104462, position: 2133813782, bucket: 29dadb9, record: 225d5733 08 4baae6 0 1 1 -i: 358933876, position: 164025, bucket: 2ac9cae, record: c5bddd70 31 37bbae 0 0 0 -i: 358933877, position: 955934793, bucket: 2ac9cae, record: dd60e332 37 2c1c66 0 1 0 -i: 405708885, position: 164030, bucket: 305d40a, record: e167ee01 38 2cfdc0 0 0 1 -i: 405708886, position: 491275468, bucket: 305d40a, record: e167ee01 38 2cfdc0 0 0 1 -i: 405708887, position: 2361018409, bucket: 305d40a, record: e167ee03 38 2cfdc0 0 1 1 -i: 1012682444, position: 164024, bucket: 78b89d9, record: 2c7107a9 0b 0e20f5 0 0 1 -i: 1012682445, position: 491275462, bucket: 78b89d9, record: 2c7107a9 0b 0e20f5 0 0 1 -i: 1012682446, position: 2271026583, bucket: 78b89d9, record: 2c7107a9 0b 0e20f5 0 0 1 -i: 1012682447, position: 4110514254, bucket: 78b89d9, record: f5f5771a 3d 3eaee3 0 1 0 -i: 1275560092, position: 164027, bucket: 980f013, record: 457b5321 11 2f6a64 0 0 1 -i: 1275560093, position: 491275465, bucket: 980f013, record: 457b5321 11 2f6a64 0 0 1 -i: 1275560094, position: 1068032020, bucket: 980f013, record: 457b5321 11 2f6a64 0 0 1 -i: 1275560095, position: 2361018406, bucket: 980f013, record: 457b5323 11 2f6a64 0 1 1 -i: 1276756702, position: 164018, bucket: 983385b, record: 5a55cd29 16 4ab9a5 0 0 1 -i: 1276756703, position: 491275456, bucket: 983385b, record: 5a55cd2b 16 4ab9a5 0 1 1 -i: 1349535650, position: 164029, bucket: a0e08f4, record: 9e175348 27 42ea69 0 0 0 -i: 1349535651, position: 491275467, bucket: a0e08f4, record: 9e175348 27 42ea69 0 0 0 -i: 1349535652, position: 2361018408, bucket: a0e08f4, record: 9e175348 27 42ea69 0 0 0 -i: 1349535653, position: 1717893350, bucket: a0e08f4, record: 9e175349 27 42ea69 0 0 1 -i: 1349535654, position: 1810367446, bucket: a0e08f4, record: 9e175349 27 42ea69 0 0 1 -i: 1349535655, position: 4110471557, bucket: a0e08f4, record: a5a5a569 29 34b4ad 0 0 1 -i: 1481727078, position: 164026, bucket: b0a2b8c, record: 9e21d780 27 443af0 0 0 0 -i: 1481727079, position: 4110671682, bucket: b0a2b8c, record: f0f0e6c6 3c 1e1cd8 1 1 0 -i: 1809824126, position: 164022, bucket: d7bf72f, record: f20b4c10 3c 416982 0 0 0 -i: 1809824127, position: 4110582041, bucket: d7bf72f, record: 82828282 20 505050 0 1 0 -i: 1841372825, position: 164028, bucket: db823d3, record: 705d4d20 1c 0ba9a4 0 0 0 -i: 1841372826, position: 491275466, bucket: db823d3, record: 705d4d20 1c 0ba9a4 0 0 0 -i: 1841372827, position: 2361018407, bucket: db823d3, record: 705d4d20 1c 0ba9a4 0 0 0 -i: 1841372828, position: 1173208795, bucket: db823d3, record: 705d4d21 1c 0ba9a4 0 0 1 -i: 1841372829, position: 1810367447, bucket: db823d3, record: 705d4d21 1c 0ba9a4 0 0 1 -i: 1841372830, position: 2524182161, bucket: db823d3, record: 705d4d21 1c 0ba9a4 0 0 1 -i: 1841372831, position: 4110596772, bucket: db823d3, record: a4a4a4a4 29 149494 1 0 0 -i: 1854408244, position: 164021, bucket: dd100c6, record: da996679 36 532ccf 0 0 1 -i: 1854408245, position: 491275459, bucket: dd100c6, record: da996679 36 532ccf 0 0 1 -i: 1854408246, position: 2271026580, bucket: dd100c6, record: da996679 36 532ccf 0 0 1 -i: 1854408247, position: 2361018400, bucket: dd100c6, record: da99667b 36 532ccf 0 1 1 -i: 2312536315, position: 164016, bucket: 113ad01f, record: e3a2e610 38 745cc2 0 0 0 -i: 2312536316, position: 181960267, bucket: 113ad01f, record: f37301c2 3c 6e6038 0 1 0 -i: 2454762901, position: 164017, bucket: 124a16b2, record: 414bccd0 10 29799a 0 0 0 -i: 2454762902, position: 491275455, bucket: 124a16b2, record: 414bccd0 10 29799a 0 0 0 -i: 2454762903, position: 4110529313, bucket: 124a16b2, record: 9a9a9a9a 26 535353 0 1 0 -i: 2579163857, position: 164023, bucket: 13375d5a, record: 4d21a650 13 2434ca 0 0 0 -i: 2579163858, position: 491275461, bucket: 13375d5a, record: 4d21a650 13 2434ca 0 0 0 -i: 2579163859, position: 2271026582, bucket: 13375d5a, record: 4d21a650 13 2434ca 0 0 0 -i: 2579163860, position: 2289834295, bucket: 13375d5a, record: 4d21a650 13 2434ca 0 0 0 -i: 2579163861, position: 2361018402, bucket: 13375d5a, record: 4d21a650 13 2434ca 0 0 0 -i: 2579163862, position: 2727213099, bucket: 13375d5a, record: 4d21a651 13 2434ca 0 0 1 -i: 2579163863, position: 4110431460, bucket: 13375d5a, record: e3c5524c 38 78aa49 1 0 0 -i: 3246648837, position: 164015, bucket: 18307dc0, record: 1f04ff9 00 3e09ff 0 0 1 -i: 3246648838, position: 18408227, bucket: 18307dc0, record: 1f04ff9 00 3e09ff 0 0 1 -i: 3246648839, position: 4110543918, bucket: 18307dc0, record: ffffffff 3f 7fffff 1 1 1 -size: 15492, min: 164015 -*/ -/* - ********* Hashtable V7 ************ -i: 88104482, position: 163857, bucket: a80bc4, record: 8738b4d0 21 67169a 0 0 0 -i: 88104483, position: 1170195791, bucket: a80bc4, record: 9950b5bd 26 2a16b7 1 0 1 -i: 88104484, position: 2527274721, bucket: a80bc4, record: 87940810 21 728102 0 0 0 -i: 88104485, position: 1881874082, bucket: a80bc4, record: 879aa2b3 21 735456 0 1 1 -i: 326266458, position: 163868, bucket: 26e4dcb, record: 73786250 1c 6f0c4a 0 0 0 -i: 326266459, position: 2651802530, bucket: 26e4dcb, record: 73a6f3a2 1c 74de74 0 1 0 -i: 585378490, position: 163843, bucket: 45c8557, record: ec122c3a 3b 024587 0 1 0 -i: 719018456, position: 163858, bucket: 55b6b3b, record: 69808dea 1a 3011bd 0 1 0 -i: 945034968, position: 163854, bucket: 70a82db, record: 661a89d2 19 43513a 0 1 0 -i: 1175941496, position: 163866, bucket: 8c2ee2f, record: e3fa4cda 38 7f499b 0 1 0 -i: 1246235720, position: 163863, bucket: 9490189, record: 20c7db5a 08 18fb6b 0 1 0 -i: 1251782843, position: 163855, bucket: 9539617, record: feb8969a 3f 5712d3 0 1 0 -i: 1305065840, position: 163867, bucket: 9b9372e, record: c5e18942 31 3c3128 0 1 0 -i: 1317662010, position: 163850, bucket: 9d13da7, record: fa917410 3e 522e82 0 0 0 -i: 1317662011, position: 1866893293, bucket: 9d13da7, record: faa3077c 3e 5460ef 1 0 0 -i: 1317662012, position: 2461380759, bucket: 9d13da7, record: faa3077d 3e 5460ef 1 0 1 -i: 1317662013, position: 1936430087, bucket: 9d13da7, record: faa3077d 3e 5460ef 1 0 1 -i: 1317662014, position: 655167895, bucket: 9d13da7, record: faa3077d 3e 5460ef 1 0 1 -i: 1317662015, position: 4110659670, bucket: 9d13da7, record: efefefef 3b 7dfdfd 1 1 1 -i: 1319795192, position: 163845, bucket: 9d54f3f, record: e93331e2 3a 26663c 0 1 0 -i: 1528050906, position: 163848, bucket: b62869b, record: 7b4ea3ea 1e 69d47d 0 1 0 -i: 1620992618, position: 163860, bucket: c13cc4d, record: ac3d96f2 2b 07b2de 0 1 0 -i: 2229760632, position: 163851, bucket: 109cee4f, record: f8072852 3e 00e50a 0 1 0 -i: 2259045819, position: 163852, bucket: 10d4c9b7, record: f34f6aca 3c 69ed59 0 1 0 -i: 2461650632, position: 163840, bucket: 125739d9, record: 20f75d8a 08 1eebb1 0 1 0 -i: 2615785281, position: 163856, bucket: 137d36e8, record: 195cea5a 06 2b9d4b 0 1 0 -i: 2617211309, position: 163864, bucket: 137fef35, record: b0c33928 2c 186725 0 0 0 -i: 2617211310, position: 2212288375, bucket: 137fef35, record: b6394d98 2d 4729b3 0 0 0 -i: 2617211311, position: 4110489025, bucket: 137fef35, record: b8b8b8ad 2e 171715 1 0 1 -i: 3540465321, position: 163846, bucket: 1a60e6d5, record: b5a119f0 2d 34233e 0 0 0 -i: 3540465322, position: 2705840826, bucket: 1a60e6d5, record: a6a1c890 29 543912 0 0 0 -i: 3540465323, position: 2421611295, bucket: 1a60e6d5, record: a6da2182 29 5b4430 0 1 0 -i: 3546073317, position: 163847, bucket: 1a6b991c, record: 93cb332a 24 796665 0 1 0 -i: 3977797539, position: 163844, bucket: 1da30bf4, record: 89f644f0 22 3ec89e 0 0 0 -i: 3977797540, position: 821505477, bucket: 1da30bf4, record: 924ec0dc 24 49d81b 1 0 0 -i: 3977797541, position: 1432481257, bucket: 1da30bf4, record: 95861ce0 25 30c39c 0 0 0 -i: 3977797542, position: 81847960, bucket: 1da30bf4, record: 8a5f0a2a 22 4be145 0 1 0 -*/ + std::cerr << "size: " << records.size() << ", min: " << minPosition + << std::endl; + // TODO: check that we have the good encoding and Crc hash for the seed at + // position 163840 + /* + + 164012: 0035de20855c4d8e: 005937e5dcd: 0164df977: 05c4d8e: + 164013: 002643a64efec778: 003f600b72c: 00fd802dc: 07ec778: + 164014: 0024e8498f420b6f: 003d20b9d54: 00f482e75: 0420b6f: + 164015: 003f4027ea80c9a2: 0068c2421c6: 01a309087: 000c9a2: + 164016: 00299c8789f45cc2: 0044eb407c7: 0113ad01f: 0745cc2: + 164017: 002c2baf9129799a: 0049285ac86: 0124a16b2: 029799a: + 164018: 00003ec58be38a7d: 000067f72fa: 00019fdcb: 0638a7d: + 164019: 0006515647af6f7b: 000a76b6e6a: 0029dadb9: 02f6f7b: + 164020: 0001863cc7e83890: 00028654ab1: 000a1952a: 0683890: + 164021: 001b7173f2541ca2: 002d73e8095: 00b5cfa02: 0541ca2: + 164022: 002090d28fc16982: 0035efdcbe1: 00d7bf72f: 0416982: + 164023: 002e68bad0a434ca: 004cdd75698: 013375d5a: 02434ca: + 164024: 0000ae00db80d929: 000120316b8: 000480c5a: 000d929: + 164025: 0006756713b7bbae: 000ab272b89: 002ac9cae: 037bbae: + 164026: 001aa977ab443af0: 002c28ae33a: 00b0a2b8c: 0443af0: + 164027: 001dbaa1297f1c3a: 00313d1aeca: 00c4f46bb: 07f1c3a: + 164028: 00212226390ba9a4: 0036e08f4e7: 00db823d3: 00ba9a4: + 164029: 001848898e42ea69: 00283823d39: 00a0e08f4: 042ea69: + 164030: 00204cc546c7377e: 00357f26bd2: 00d5fc9af: 047377e: + 164031: 0023271f3e1819c4: 003a38cbbed: 00e8e32ef: 01819c4: + 164032: 0025b167d37bbcc0: 003e6dd3f63: 00f9b74fd: 07bbcc0: + 164033: 00127ca53770fdb6: 001e9e71a3d: 007a79c68: 070fdb6: + 164034: 0022d3a73e8851c7: 0039ae8cff9: 00e6ba33f: 00851c7: + 164035: 0035de20855c4d8e: 005937e5dcd: 0164df977: 05c4d8e: + 164036: 002643a64efec778: 003f600b72c: 00fd802dc: 07ec778: + 164037: 0024e8498f420b6f: 003d20b9d54: 00f482e75: 0420b6f: + 164038: 0009287b35d36195: 000f2b0c112: 003cac304: 0536195: + 164039: 00127ca53770fdb6: 001e9e71a3d: 007a79c68: 070fdb6: + 164040: 0022d3a73e8851c7: 0039ae8cff9: 00e6ba33f: 00851c7: + 164041: 0035de20855c4d8e: 005937e5dcd: 0164df977: 05c4d8e: + 164042: 002643a64efec778: 003f600b72c: 00fd802dc: 07ec778: + 164043: 0024e8498f420b6f: 003d20b9d54: 00f482e75: 0420b6f: + + *********** Hashtable V8 ************************* + i: 85800915, position: 164020, bucket: a3a6fa, record: 5b41c480 16 + 683890 0 0 0 i: 85800916, position: 491275458, bucket: a3a6fa, record: + 5b41c480 16 683890 0 0 0 i: 85800917, position: 2361018399, bucket: a3a6fa, + record: 5b41c480 16 683890 0 0 0 i: 85800918, position: 1387614846, bucket: + a3a6fa, record: 5b41c481 16 683890 0 0 1 i: 85800919, position: 2727213102, + bucket: a3a6fa, record: 5b41c483 16 683890 0 1 1 i: 351104461, position: + 164019, bucket: 29dadb9, record: 357b7bd8 0d 2f6f7b 0 0 0 i: 351104462, + position: 2133813782, bucket: 29dadb9, record: 225d5733 08 4baae6 0 1 1 i: + 358933876, position: 164025, bucket: 2ac9cae, record: c5bddd70 31 37bbae + 0 0 0 i: 358933877, position: 955934793, bucket: 2ac9cae, record: dd60e332 + 37 2c1c66 0 1 0 i: 405708885, position: 164030, bucket: 305d40a, record: + e167ee01 38 2cfdc0 0 0 1 i: 405708886, position: 491275468, bucket: 305d40a, + record: e167ee01 38 2cfdc0 0 0 1 i: 405708887, position: 2361018409, bucket: + 305d40a, record: e167ee03 38 2cfdc0 0 1 1 i: 1012682444, position: 164024, + bucket: 78b89d9, record: 2c7107a9 0b 0e20f5 0 0 1 i: 1012682445, position: + 491275462, bucket: 78b89d9, record: 2c7107a9 0b 0e20f5 0 0 1 i: 1012682446, + position: 2271026583, bucket: 78b89d9, record: 2c7107a9 0b 0e20f5 0 0 1 i: + 1012682447, position: 4110514254, bucket: 78b89d9, record: f5f5771a 3d 3eaee3 + 0 1 0 i: 1275560092, position: 164027, bucket: 980f013, record: 457b5321 + 11 2f6a64 0 0 1 i: 1275560093, position: 491275465, bucket: 980f013, record: + 457b5321 11 2f6a64 0 0 1 i: 1275560094, position: 1068032020, bucket: 980f013, + record: 457b5321 11 2f6a64 0 0 1 i: 1275560095, position: 2361018406, bucket: + 980f013, record: 457b5323 11 2f6a64 0 1 1 i: 1276756702, position: 164018, + bucket: 983385b, record: 5a55cd29 16 4ab9a5 0 0 1 i: 1276756703, position: + 491275456, bucket: 983385b, record: 5a55cd2b 16 4ab9a5 0 1 1 i: 1349535650, + position: 164029, bucket: a0e08f4, record: 9e175348 27 42ea69 0 0 0 i: + 1349535651, position: 491275467, bucket: a0e08f4, record: 9e175348 27 42ea69 + 0 0 0 i: 1349535652, position: 2361018408, bucket: a0e08f4, record: 9e175348 + 27 42ea69 0 0 0 i: 1349535653, position: 1717893350, bucket: a0e08f4, record: + 9e175349 27 42ea69 0 0 1 i: 1349535654, position: 1810367446, bucket: a0e08f4, + record: 9e175349 27 42ea69 0 0 1 i: 1349535655, position: 4110471557, bucket: + a0e08f4, record: a5a5a569 29 34b4ad 0 0 1 i: 1481727078, position: 164026, + bucket: b0a2b8c, record: 9e21d780 27 443af0 0 0 0 i: 1481727079, position: + 4110671682, bucket: b0a2b8c, record: f0f0e6c6 3c 1e1cd8 1 1 0 i: 1809824126, + position: 164022, bucket: d7bf72f, record: f20b4c10 3c 416982 0 0 0 i: + 1809824127, position: 4110582041, bucket: d7bf72f, record: 82828282 20 505050 + 0 1 0 i: 1841372825, position: 164028, bucket: db823d3, record: 705d4d20 + 1c 0ba9a4 0 0 0 i: 1841372826, position: 491275466, bucket: db823d3, record: + 705d4d20 1c 0ba9a4 0 0 0 i: 1841372827, position: 2361018407, bucket: db823d3, + record: 705d4d20 1c 0ba9a4 0 0 0 i: 1841372828, position: 1173208795, bucket: + db823d3, record: 705d4d21 1c 0ba9a4 0 0 1 i: 1841372829, position: 1810367447, + bucket: db823d3, record: 705d4d21 1c 0ba9a4 0 0 1 i: 1841372830, position: + 2524182161, bucket: db823d3, record: 705d4d21 1c 0ba9a4 0 0 1 i: 1841372831, + position: 4110596772, bucket: db823d3, record: a4a4a4a4 29 149494 1 0 0 i: + 1854408244, position: 164021, bucket: dd100c6, record: da996679 36 532ccf + 0 0 1 i: 1854408245, position: 491275459, bucket: dd100c6, record: da996679 + 36 532ccf 0 0 1 i: 1854408246, position: 2271026580, bucket: dd100c6, record: + da996679 36 532ccf 0 0 1 i: 1854408247, position: 2361018400, bucket: dd100c6, + record: da99667b 36 532ccf 0 1 1 i: 2312536315, position: 164016, bucket: + 113ad01f, record: e3a2e610 38 745cc2 0 0 0 i: 2312536316, position: 181960267, + bucket: 113ad01f, record: f37301c2 3c 6e6038 0 1 0 i: 2454762901, position: + 164017, bucket: 124a16b2, record: 414bccd0 10 29799a 0 0 0 i: 2454762902, + position: 491275455, bucket: 124a16b2, record: 414bccd0 10 29799a 0 0 0 i: + 2454762903, position: 4110529313, bucket: 124a16b2, record: 9a9a9a9a 26 535353 + 0 1 0 i: 2579163857, position: 164023, bucket: 13375d5a, record: 4d21a650 + 13 2434ca 0 0 0 i: 2579163858, position: 491275461, bucket: 13375d5a, record: + 4d21a650 13 2434ca 0 0 0 i: 2579163859, position: 2271026582, bucket: + 13375d5a, record: 4d21a650 13 2434ca 0 0 0 i: 2579163860, position: + 2289834295, bucket: 13375d5a, record: 4d21a650 13 2434ca 0 0 0 i: 2579163861, + position: 2361018402, bucket: 13375d5a, record: 4d21a650 13 2434ca 0 0 0 i: + 2579163862, position: 2727213099, bucket: 13375d5a, record: 4d21a651 13 2434ca + 0 0 1 i: 2579163863, position: 4110431460, bucket: 13375d5a, record: e3c5524c + 38 78aa49 1 0 0 i: 3246648837, position: 164015, bucket: 18307dc0, record: + 1f04ff9 00 3e09ff 0 0 1 i: 3246648838, position: 18408227, bucket: 18307dc0, + record: 1f04ff9 00 3e09ff 0 0 1 i: 3246648839, position: 4110543918, bucket: + 18307dc0, record: ffffffff 3f 7fffff 1 1 1 size: 15492, min: 164015 + */ + /* + ********* Hashtable V7 ************ + i: 88104482, position: 163857, bucket: a80bc4, record: 8738b4d0 21 + 67169a 0 0 0 i: 88104483, position: 1170195791, bucket: a80bc4, record: + 9950b5bd 26 2a16b7 1 0 1 i: 88104484, position: 2527274721, bucket: a80bc4, + record: 87940810 21 728102 0 0 0 i: 88104485, position: 1881874082, bucket: + a80bc4, record: 879aa2b3 21 735456 0 1 1 i: 326266458, position: 163868, + bucket: 26e4dcb, record: 73786250 1c 6f0c4a 0 0 0 i: 326266459, position: + 2651802530, bucket: 26e4dcb, record: 73a6f3a2 1c 74de74 0 1 0 i: 585378490, + position: 163843, bucket: 45c8557, record: ec122c3a 3b 024587 0 1 0 i: + 719018456, position: 163858, bucket: 55b6b3b, record: 69808dea 1a 3011bd + 0 1 0 i: 945034968, position: 163854, bucket: 70a82db, record: 661a89d2 + 19 43513a 0 1 0 i: 1175941496, position: 163866, bucket: 8c2ee2f, record: + e3fa4cda 38 7f499b 0 1 0 i: 1246235720, position: 163863, bucket: 9490189, + record: 20c7db5a 08 18fb6b 0 1 0 i: 1251782843, position: 163855, bucket: + 9539617, record: feb8969a 3f 5712d3 0 1 0 i: 1305065840, position: 163867, + bucket: 9b9372e, record: c5e18942 31 3c3128 0 1 0 i: 1317662010, position: + 163850, bucket: 9d13da7, record: fa917410 3e 522e82 0 0 0 i: 1317662011, + position: 1866893293, bucket: 9d13da7, record: faa3077c 3e 5460ef 1 0 0 i: + 1317662012, position: 2461380759, bucket: 9d13da7, record: faa3077d 3e 5460ef + 1 0 1 i: 1317662013, position: 1936430087, bucket: 9d13da7, record: faa3077d + 3e 5460ef 1 0 1 i: 1317662014, position: 655167895, bucket: 9d13da7, record: + faa3077d 3e 5460ef 1 0 1 i: 1317662015, position: 4110659670, bucket: 9d13da7, + record: efefefef 3b 7dfdfd 1 1 1 i: 1319795192, position: 163845, bucket: + 9d54f3f, record: e93331e2 3a 26663c 0 1 0 i: 1528050906, position: 163848, + bucket: b62869b, record: 7b4ea3ea 1e 69d47d 0 1 0 i: 1620992618, position: + 163860, bucket: c13cc4d, record: ac3d96f2 2b 07b2de 0 1 0 i: 2229760632, + position: 163851, bucket: 109cee4f, record: f8072852 3e 00e50a 0 1 0 i: + 2259045819, position: 163852, bucket: 10d4c9b7, record: f34f6aca 3c 69ed59 + 0 1 0 i: 2461650632, position: 163840, bucket: 125739d9, record: 20f75d8a + 08 1eebb1 0 1 0 i: 2615785281, position: 163856, bucket: 137d36e8, record: + 195cea5a 06 2b9d4b 0 1 0 i: 2617211309, position: 163864, bucket: + 137fef35, record: b0c33928 2c 186725 0 0 0 i: 2617211310, position: + 2212288375, bucket: 137fef35, record: b6394d98 2d 4729b3 0 0 0 i: 2617211311, + position: 4110489025, bucket: 137fef35, record: b8b8b8ad 2e 171715 1 0 1 i: + 3540465321, position: 163846, bucket: 1a60e6d5, record: b5a119f0 2d 34233e + 0 0 0 i: 3540465322, position: 2705840826, bucket: 1a60e6d5, record: a6a1c890 + 29 543912 0 0 0 i: 3540465323, position: 2421611295, bucket: 1a60e6d5, record: + a6da2182 29 5b4430 0 1 0 i: 3546073317, position: 163847, bucket: + 1a6b991c, record: 93cb332a 24 796665 0 1 0 i: 3977797539, position: 163844, + bucket: 1da30bf4, record: 89f644f0 22 3ec89e 0 0 0 i: 3977797540, position: + 821505477, bucket: 1da30bf4, record: 924ec0dc 24 49d81b 1 0 0 i: 3977797541, + position: 1432481257, bucket: 1da30bf4, record: 95861ce0 25 30c39c 0 0 0 i: + 3977797542, position: 81847960, bucket: 1da30bf4, record: 8a5f0a2a 22 4be145 + 0 1 0 + */ } -TEST_F(HashtableFixture, CheckHashtableConfig) -{ -std::cerr << "\n---------------------------" << std::endl; +TEST_F(HashtableFixture, CheckHashtableConfig) { + std::cerr << "\n---------------------------" << std::endl; const auto hashtableConfig = Environment::getHashtableConfig(); - using dragenos::sequences::CrcPolynomial; using dragenos::sequences::CrcHasher; - CrcPolynomial priCrcPolynomial(hashtableConfig->getPrimaryCrcBits(), hashtableConfig->getPriCrcPoly()); - ASSERT_TRUE(priCrcPolynomial == "2C991CE6A8DD55"); // the value from hash_table.cfg - CrcPolynomial secCrcPolynomial(hashtableConfig->getSecondaryCrcBits(), hashtableConfig->getSecCrcPoly()); - ASSERT_TRUE(secCrcPolynomial == "1524CA66E8D39"); // the value from hash_table.cfg + using dragenos::sequences::CrcPolynomial; + CrcPolynomial priCrcPolynomial(hashtableConfig->getPrimaryCrcBits(), + hashtableConfig->getPriCrcPoly()); + ASSERT_TRUE(priCrcPolynomial == + "2C991CE6A8DD55"); // the value from hash_table.cfg + CrcPolynomial secCrcPolynomial(hashtableConfig->getSecondaryCrcBits(), + hashtableConfig->getSecCrcPoly()); + ASSERT_TRUE(secCrcPolynomial == + "1524CA66E8D39"); // the value from hash_table.cfg CrcHasher priCrcHasher(priCrcPolynomial); CrcHasher secCrcHasher(secCrcPolynomial); ASSERT_EQ(21u, hashtableConfig->getPrimarySeedBases()); } -std::ostream &printBases(std::ostream &os, const dragenos::sequences::Read &read) -{ +std::ostream &printBases(std::ostream &os, + const dragenos::sequences::Read &read) { using dragenos::sequences::Read; - for(auto b: read.getBases()) os << (unsigned)b << " "; - for(auto b: read.getBases()) os << Read::decodeBase(b); + for (auto b : read.getBases()) + os << (unsigned)b << " "; + for (auto b : read.getBases()) + os << Read::decodeBase(b); return os; } // Encode a sequence with 2 bases per byte into a sequence with 4 bases per byte -std::vector encode4bpbTo2bpb(const std::vector sequence) -{ - auto encode = [] (unsigned char b) - { - switch (b) - { - case 2: return 1; - case 4: return 2; - case 8: return 3; - default: return 0; +std::vector +encode4bpbTo2bpb(const std::vector sequence) { + auto encode = [](unsigned char b) { + switch (b) { + case 2: + return 1; + case 4: + return 2; + case 8: + return 3; + default: + return 0; } }; std::vector encoded; bool even = true; - for (const uint8_t b4x2: sequence) - { + for (const uint8_t b4x2 : sequence) { // convert the 2x 4 bits into 2x 2bits const uint8_t b2x2 = encode(b4x2 & 0xF) | (encode(b4x2 >> 4) << 2); - if (even) - { + if (even) { encoded.push_back(b2x2); - } - else - { + } else { encoded.back() |= (b2x2 << 4); } - even = ! even; + even = !even; } - return encoded; + return encoded; } -TEST_F(HashtableFixture, CheckReferenceBasesHashtableV8) -{ - //Chr1 starts at position 163840 - 9984 first bases (Ns) are trimmed. +TEST_F(HashtableFixture, CheckReferenceBasesHashtableV8) { + // Chr1 starts at position 163840 - 9984 first bases (Ns) are trimmed. ASSERT_EQ(163840u, hashtableConfig->getSequences()[0].seqStart); ASSERT_EQ(9984u, hashtableConfig->getSequences()[0].begTrim); // Beginning of the untrimmed sequence from FASTA: @@ -533,15 +553,18 @@ TEST_F(HashtableFixture, CheckReferenceBasesHashtableV8) // TAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTCAGCCGGCCCGCCCGCCCGGG // // corresponding hexdump from reference.bin: - const std::vector sequence { - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, - 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, - 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, - 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x11, 0x22, - 0x82, 0x11, 0x22, 0x82, 0x11, 0x22, 0x82, 0x11, 0x22, 0x82, 0x11, 0x22, 0x82, 0x11, 0x22, 0x82, - 0x11, 0x22, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, - 0x21, 0x82, 0x11, 0x22, 0x82, 0x11, 0x22, 0x82, 0x11, 0x22, 0x82, 0x11, 0x22, 0x82, 0x11, 0x22, - 0x82, 0x11, 0x22, 0x82, 0x11, 0x22, 0x82, 0x11, 0x22, 0x82, 0x11, 0x22, 0x22, 0x18, 0x21, 0x22}; + const std::vector sequence{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x18, 0x21, 0x22, 0x18, + 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, + 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, + 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, + 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, + 0x21, 0x22, 0x11, 0x22, 0x82, 0x11, 0x22, 0x82, 0x11, 0x22, 0x82, 0x11, + 0x22, 0x82, 0x11, 0x22, 0x82, 0x11, 0x22, 0x82, 0x11, 0x22, 0x22, 0x18, + 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, 0x21, 0x22, 0x18, + 0x21, 0x82, 0x11, 0x22, 0x82, 0x11, 0x22, 0x82, 0x11, 0x22, 0x82, 0x11, + 0x22, 0x82, 0x11, 0x22, 0x82, 0x11, 0x22, 0x82, 0x11, 0x22, 0x82, 0x11, + 0x22, 0x82, 0x11, 0x22, 0x22, 0x18, 0x21, 0x22}; const std::vector encoded2bpb = encode4bpbTo2bpb(sequence); ASSERT_EQ(sequence.size(), encoded2bpb.size() * 2); @@ -555,23 +578,22 @@ TEST_F(HashtableFixture, CheckReferenceBasesHashtableV8) ASSERT_EQ(128u, sequence.size()); ASSERT_EQ(0x00014000 * 2, 163840); - for (size_t i = 0; 0x14000 > i; ++i) - { + for (size_t i = 0; 0x14000 > i; ++i) { ASSERT_EQ(0, referenceSequence.getData()[i]) << "i: " << i; - ASSERT_EQ(0, referenceSequence.getBase(2*i)) << "i: " << i; - ASSERT_EQ(0, referenceSequence.getBase(2*i + 1)) << "i: " << i; + ASSERT_EQ(0, referenceSequence.getBase(2 * i)) << "i: " << i; + ASSERT_EQ(0, referenceSequence.getBase(2 * i + 1)) << "i: " << i; } // check that all the bases trimmed at the beginning are N - for (size_t i = 0; 9984 > i; ++i) - { + for (size_t i = 0; 9984 > i; ++i) { ASSERT_EQ(15, referenceSequence.getBase(163840 + i)); } - for (size_t i = 0x14000; 0x14008 > i; ++i) - { - ASSERT_EQ(0xFF, globalTestEnvironment->getReferenceData()[i]) << "i: " << i << ": " << (i - 0x14000); - ASSERT_EQ(0xFF, referenceSequence.getData()[i]) << "i: " << i << ": " << (i - 0x14000); - ASSERT_EQ(0xF, referenceSequence.getBase(9984 + 2*i)) << "i: " << i; - ASSERT_EQ(0xF, referenceSequence.getBase(9984 + 2*i + 1)) << "i: " << i; + for (size_t i = 0x14000; 0x14008 > i; ++i) { + ASSERT_EQ(0xFF, globalTestEnvironment->getReferenceData()[i]) + << "i: " << i << ": " << (i - 0x14000); + ASSERT_EQ(0xFF, referenceSequence.getData()[i]) + << "i: " << i << ": " << (i - 0x14000); + ASSERT_EQ(0xF, referenceSequence.getBase(9984 + 2 * i)) << "i: " << i; + ASSERT_EQ(0xF, referenceSequence.getBase(9984 + 2 * i + 1)) << "i: " << i; } ASSERT_EQ(0x18, referenceSequence.getData()[0x14000 + 8]); ASSERT_EQ(0x8, referenceSequence.getBase(163840 + 9984 + 8 * 2)); @@ -580,146 +602,177 @@ TEST_F(HashtableFixture, CheckReferenceBasesHashtableV8) ASSERT_EQ(0x1, referenceSequence.getBase(163840 + 9984 + 9 * 2)); ASSERT_EQ(0x2, referenceSequence.getBase(163840 + 9984 + 9 * 2 + 1)); - // The first primary seed that has a forward hit is at position 164015 = 163840 + 175 - // Generate the seeds from a few bases before to a few bases after and hash them - // start at offset ((175/4) * 4)==172 and do 32 primary seeds + // The first primary seed that has a forward hit is at position 164015 = + // 163840 + 175 Generate the seeds from a few bases before to a few bases + // after and hash them start at offset ((175/4) * 4)==172 and do 32 primary + // seeds const unsigned seedLength = hashtableConfig->getPrimarySeedBases(); ASSERT_EQ(21u, seedLength); const uint64_t seedMask = (((uint64_t)1) << (seedLength * 2)) - 1; // check that the mask has 2 * seedLength LSB set - for (unsigned i = 0; 2 * seedLength > i; ++i) - { + for (unsigned i = 0; 2 * seedLength > i; ++i) { ASSERT_EQ(1u, (seedMask >> i) & 1) << "i: " << i; } // check that the mask bits after that al 0 ASSERT_EQ(0u, seedMask >> (2 * seedLength)); const unsigned newBaseShift = 2 * seedLength - 2; using dragenos::sequences::CrcPolynomial; - CrcPolynomial priCrcPolynomial(hashtableConfig->getPrimaryCrcBits(), hashtableConfig->getPriCrcPoly()); - ASSERT_TRUE(priCrcPolynomial == "2C991CE6A8DD55"); // the value from hash_table.cfg + CrcPolynomial priCrcPolynomial(hashtableConfig->getPrimaryCrcBits(), + hashtableConfig->getPriCrcPoly()); + ASSERT_TRUE(priCrcPolynomial == + "2C991CE6A8DD55"); // the value from hash_table.cfg ASSERT_EQ(54u, hashtableConfig->getPrimaryCrcBits()); // void* crcHash64Init(int bits, void const* poly); const uint64_t poly = 0x2C991CE6A8DD55; - uint64_t *init = reinterpret_cast(crcHash64Init(54, &poly)); + uint64_t *init = reinterpret_cast(crcHash64Init(54, &poly)); ASSERT_EQ(encoded2bpb.size(), 64u); const size_t begin = 175 / 4; ASSERT_GT(64u, begin + 8 + 5); // enough space to get 32 deeds of 21 bases - uint64_t seedValue = (*reinterpret_cast(encoded2bpb.data() + begin)) & seedMask; - const uint8_t *bytePtr = encoded2bpb.data() + begin + 5; // pointer to the byte that contains the next bases + uint64_t seedValue = + (*reinterpret_cast(encoded2bpb.data() + begin)) & + seedMask; + const uint8_t *bytePtr = + encoded2bpb.data() + begin + + 5; // pointer to the byte that contains the next bases uint8_t byte = (*bytePtr) >> 2; - //std::cerr << "\n ------------ printing hash values for region " << (163840 + 4*begin) << " to " << (163840 + 4*begin + 32) << std::endl; + // std::cerr << "\n ------------ printing hash values for region " << + // (163840 + 4*begin) << " to " << (163840 + 4*begin + 32) << std::endl; const unsigned table_size_64ths = 53; - ASSERT_EQ(table_size_64ths, hashtableConfig->getTableSize64Ths()); - for (size_t i = 0; 32 > i; ++i) - { + ASSERT_EQ(table_size_64ths, hashtableConfig->getTableSize64Ths()); + for (size_t i = 0; 32 > i; ++i) { // verify that the seed value matches the reference sequence auto tmp = seedValue; - for (size_t j = 0; seedLength > j; ++j) - { -// TODO FIX the introduction of the new bases - ASSERT_EQ(1U << (tmp & 3), referenceSequence.getBase(163840 + 9984 + 4*begin + i + j)) << "i: " << i << ", j: " << j; + for (size_t j = 0; seedLength > j; ++j) { + // TODO FIX the introduction of the new bases + ASSERT_EQ(1U << (tmp & 3), + referenceSequence.getBase(163840 + 9984 + 4 * begin + i + j)) + << "i: " << i << ", j: " << j; tmp = (tmp >> 2); } uint64_t hashValue; - crcHash64(init, reinterpret_cast(&seedValue), &hashValue); + crcHash64(init, reinterpret_cast(&seedValue), &hashValue); // get the same value from the hashtable const auto primaryHasher = hashtable->getPrimaryHasher(); const uint64_t hashValueFromHashtable = primaryHasher->getHash64(seedValue); ASSERT_EQ(hashValue, hashValueFromHashtable); - //std::cerr << "Seed Value : Hash Value : " << std::hex << seedValue << " " << hashValueFromHashtable << std::dec << std::endl; - // print the hashValue, virtual address, bucket and hash bits - //const uint64_t addressBits = dragenos::common::bits::getBits<19, 35>(hashValue); - //const uint64_t virtualAddress = (addressBits * table_size_64ths) / 64; - //const uint64_t bucket = virtualAddress >> 6; - //const uint64_t hashBits = dragenos::common::bits::getBits<0, 23>(hashValue); - //std::cerr << (163840 + 4*begin + i) << ": " - // << std::hex << std::setfill('0') << std::setw(16) << hashValue << std::setfill(' ') << ": " - // << std::hex << std::setfill('0') << std::setw(11) << virtualAddress << std::setfill(' ') << ": " - // << std::hex << std::setfill('0') << std::setw(9) << bucket<< std::setfill(' ') << ": " - // << std::hex << std::setfill('0') << std::setw(7) << hashBits<< std::setfill(' ') << ": " - // << std::dec << std::endl; - // move to the next seed + // std::cerr << "Seed Value : Hash Value : " << std::hex << seedValue << " " + // << hashValueFromHashtable << std::dec << std::endl; + // print the hashValue, virtual address, bucket and hash bits + // const uint64_t addressBits = dragenos::common::bits::getBits<19, + // 35>(hashValue); const uint64_t virtualAddress = (addressBits * + // table_size_64ths) / 64; const uint64_t bucket = virtualAddress >> 6; + // const uint64_t hashBits = dragenos::common::bits::getBits<0, + // 23>(hashValue); std::cerr << (163840 + 4*begin + i) << ": " + // << std::hex << std::setfill('0') << std::setw(16) << hashValue + // << std::setfill(' ') << ": " + // << std::hex << std::setfill('0') << std::setw(11) << + // virtualAddress << std::setfill(' ') << ": " + // << std::hex << std::setfill('0') << std::setw(9) << bucket<< + // std::setfill(' ') << ": " + // << std::hex << std::setfill('0') << std::setw(7) << hashBits<< + // std::setfill(' ') << ": " + // << std::dec << std::endl; + // move to the next seed seedValue = seedValue >> 2; uint64_t newBase = (byte & 3); - if (2 == (i % 4)) - { + if (2 == (i % 4)) { ++bytePtr; byte = *bytePtr; - } - else - { + } else { byte = (byte >> 2); } seedValue |= (newBase << newBaseShift); } } -//TEST_F(HashtableFixture, CheckReferenceBasesHashtableV7) -TEST_F(HashtableFixture, DISABLED_CheckReferenceBasesHashtableV7) -{ +// TEST_F(HashtableFixture, CheckReferenceBasesHashtableV7) +TEST_F(HashtableFixture, DISABLED_CheckReferenceBasesHashtableV7) { using dragenos::sequences::Read; - //ChrM starts at position 163840 - // i: 2461650632, position: 163840, bucket: 125739d9, record: 20f75d8a 08 1eebb1 0 1 0 - std::cerr << "\n Expected seed at start of ChrM: 2461650632, position: 163840, bucket: 125739d9, record: 20f75d8a 08 1eebb1 0 1 0" << std::endl; + // ChrM starts at position 163840 + // i: 2461650632, position: 163840, bucket: 125739d9, record: 20f75d8a 08 + // 1eebb1 0 1 0 + std::cerr << "\n Expected seed at start of ChrM: 2461650632, position: " + "163840, bucket: 125739d9, record: 20f75d8a 08 1eebb1 0 1 0" + << std::endl; size_t position = 163840; - // 00014000 14 28 21 41 84 82 81 12 22 82 81 18 21 12 82 12 + // 00014000 14 28 21 41 84 82 81 12 22 82 81 18 21 12 82 12 // 00014010 42 44 41 82 82 22 81 24 81 88 44 18 88 88 42 28 - const std::vector sequence {1,4, 2,8, 2,1, 4,1, 8,4, 8,2, 8,1, 1,2, 2,2, 8,2, 8,1, 1,8, 2,1, 1,2, 8,2, 1,2, - 4,2, 4,4, 4,1, 8,2, 8,2, 2,2, 8,1, 2,4, 8,1, 8,8, 4,4, 1,8, 8,8, 8,8, 4,2, 2,8 }; + const std::vector sequence{ + 1, 4, 2, 8, 2, 1, 4, 1, 8, 4, 8, 2, 8, 1, 1, 2, 2, 2, 8, 2, 8, 1, + 1, 8, 2, 1, 1, 2, 8, 2, 1, 2, 4, 2, 4, 4, 4, 1, 8, 2, 8, 2, 2, 2, + 8, 1, 2, 4, 8, 1, 8, 8, 4, 4, 1, 8, 8, 8, 8, 8, 4, 2, 2, 8}; ASSERT_EQ(64u, sequence.size()); - for(auto s: sequence) - { + for (auto s : sequence) { ASSERT_EQ(s, referenceSequence.getBase(position)); ++position; } - auto translate = [] (unsigned char c) -> char - { + auto translate = [](unsigned char c) -> char { switch (c) { - case 1: return 'A'; - case 2: return 'C'; - case 4: return 'G'; - case 8: return 'T'; - default: return 'N'; + case 1: + return 'A'; + case 2: + return 'C'; + case 4: + return 'G'; + case 8: + return 'T'; + default: + return 'N'; } }; TestRead r; - for(auto s: sequence) - { + for (auto s : sequence) { r.bases_.push_back(translate(s)); } r.qualities_.resize(r.bases_.size(), 'B'); ASSERT_EQ(r.bases_.size(), sequence.size()); ASSERT_EQ(r.qualities_.size(), sequence.size()); std::vector refSeq = generateRefSeq(r.bases_); - //uint64_t *init = (uint64_t*)crcHash64Init(hashtableConfig->getPrimaryCrcBits(), hashtableConfig->getPriCrcPoly()); - uint64_t *init = (uint64_t*)crcHash64Init(42, hashtableConfig->getPriCrcPoly()); + // uint64_t *init = + // (uint64_t*)crcHash64Init(hashtableConfig->getPrimaryCrcBits(), + // hashtableConfig->getPriCrcPoly()); + uint64_t *init = + (uint64_t *)crcHash64Init(42, hashtableConfig->getPriCrcPoly()); auto data = refSeq; - //constexpr unsigned seedLen = 21; - //const uint64_t priSeedMask = ((uint64_t)1 << (seedLen * 2)) - 1; + // constexpr unsigned seedLen = 21; + // const uint64_t priSeedMask = ((uint64_t)1 << (seedLen * 2)) - 1; // TODO: sort out the reference bin // - //uint64_t refBinMask = binBits ? ((0ULL - 1) << binBits) : 0; - //hashKey |= (pos & refBinMask) << KEY_ANCHOR_OFFSET; + // uint64_t refBinMask = binBits ? ((0ULL - 1) << binBits) : 0; + // hashKey |= (pos & refBinMask) << KEY_ANCHOR_OFFSET; uint64_t hash1; crcHash64(init, data.data(), &hash1); auto virtualByteAddress1 = dragenos::common::bits::getBits<19, 35>(hash1); - std::cerr << "hash with all bases: " << std::hex << hash1 << " : " << dragenos::common::bits::getBits<0, 23>(hash1) << std::dec << std::endl; - std::cerr << "Virtual address from hashtable: " << std::hex << virtualByteAddress1 << std::dec << std::endl; - std::cerr << "Bucket index from hashtable: " << std::hex << hashtable->getBucketIndex(virtualByteAddress1) << std::dec << std::endl; - std::cerr << "thread Id from hashtable: " << std::hex << hashtable->getThreadIdFromVirtualByteAddress(virtualByteAddress1) << std::dec << std::endl; + std::cerr << "hash with all bases: " << std::hex << hash1 << " : " + << dragenos::common::bits::getBits<0, 23>(hash1) << std::dec + << std::endl; + std::cerr << "Virtual address from hashtable: " << std::hex + << virtualByteAddress1 << std::dec << std::endl; + std::cerr << "Bucket index from hashtable: " << std::hex + << hashtable->getBucketIndex(virtualByteAddress1) << std::dec + << std::endl; + std::cerr << "thread Id from hashtable: " << std::hex + << hashtable->getThreadIdFromVirtualByteAddress(virtualByteAddress1) + << std::dec << std::endl; data[5] &= 3; data[6] = 0; data[7] = 0; crcHash64(init, data.data(), &hash1); virtualByteAddress1 = dragenos::common::bits::getBits<19, 35>(hash1); - std::cerr << "hash with 21 bases: " << std::hex << hash1 << " : " << dragenos::common::bits::getBits<0, 23>(hash1) << std::dec << std::endl; - std::cerr << "Virtual address from hashtable: " << std::hex << virtualByteAddress1 << std::dec << std::endl; - std::cerr << "Bucket index from hashtable: " << std::hex << hashtable->getBucketIndex(virtualByteAddress1) << std::dec << std::endl; - std::cerr << "thread Id from hashtable: " << std::hex << hashtable->getThreadIdFromVirtualByteAddress(virtualByteAddress1) << std::dec << std::endl; - + std::cerr << "hash with 21 bases: " << std::hex << hash1 << " : " + << dragenos::common::bits::getBits<0, 23>(hash1) << std::dec + << std::endl; + std::cerr << "Virtual address from hashtable: " << std::hex + << virtualByteAddress1 << std::dec << std::endl; + std::cerr << "Bucket index from hashtable: " << std::hex + << hashtable->getBucketIndex(virtualByteAddress1) << std::dec + << std::endl; + std::cerr << "thread Id from hashtable: " << std::hex + << hashtable->getThreadIdFromVirtualByteAddress(virtualByteAddress1) + << std::dec << std::endl; free(init); init = NULL; @@ -727,27 +780,43 @@ TEST_F(HashtableFixture, DISABLED_CheckReferenceBasesHashtableV7) r.name_ = "dummy"; Read read; read.init(r.name(), r.bases(), r.qualities(), 0, 0); - std::cerr << "\n Read: "; printBases(std::cerr, read); std::cerr << std::endl; - const dragenos::sequences::Seed seed(&read, 0U, hashtableConfig->getPrimarySeedBases()); + std::cerr << "\n Read: "; + printBases(std::cerr, read); + std::cerr << std::endl; + const dragenos::sequences::Seed seed(&read, 0U, + hashtableConfig->getPrimarySeedBases()); const auto hashtableConfig = Environment::getHashtableConfig(); - using dragenos::sequences::CrcPolynomial; using dragenos::sequences::CrcHasher; - CrcPolynomial priCrcPolynomial(hashtableConfig->getPrimaryCrcBits(), hashtableConfig->getPriCrcPoly()); - ASSERT_TRUE(priCrcPolynomial == "2C991CE6A8DD55"); // the value from hash_table.cfg - //CrcPolynomial secCrcPolynomial(hashtableConfig->getSecCrcPoly(), hashtableConfig->getSecondaryCrcBits()); - //ASSERT_TRUE(secCrcPolynomial == "1524CA66E8D39"); // the value from hash_table.cfg + using dragenos::sequences::CrcPolynomial; + CrcPolynomial priCrcPolynomial(hashtableConfig->getPrimaryCrcBits(), + hashtableConfig->getPriCrcPoly()); + ASSERT_TRUE(priCrcPolynomial == + "2C991CE6A8DD55"); // the value from hash_table.cfg + // CrcPolynomial secCrcPolynomial(hashtableConfig->getSecCrcPoly(), + // hashtableConfig->getSecondaryCrcBits()); ASSERT_TRUE(secCrcPolynomial == + // "1524CA66E8D39"); // the value from hash_table.cfg CrcHasher priCrcHasher(priCrcPolynomial); - //CrcHasher secCrcHasher(hashtableConfig->getSecondaryCrcBits(), secCrcPolynomial); + // CrcHasher secCrcHasher(hashtableConfig->getSecondaryCrcBits(), + // secCrcPolynomial); const auto seedData = seed.getPrimaryData(false); const auto hash = priCrcHasher.getHash64(seedData); - std::cerr << "Hash value from crc hasher: " << std::hex << hash << std::dec << std::endl; - std::cerr << "Hash value from hashtable: " << std::hex << hashtable->getPrimaryHasher()->getHash64(seedData) << std::dec << std::endl; - std::cerr << "expected Virtual address: " << std::hex << (hash * 60 / 64) << std::dec << std::endl; + std::cerr << "Hash value from crc hasher: " << std::hex << hash << std::dec + << std::endl; + std::cerr << "Hash value from hashtable: " << std::hex + << hashtable->getPrimaryHasher()->getHash64(seedData) << std::dec + << std::endl; + std::cerr << "expected Virtual address: " << std::hex << (hash * 60 / 64) + << std::dec << std::endl; const auto virtualByteAddress = hashtable->getVirtualByteAddress(hash); - std::cerr << "Virtual address from hashtable: " << std::hex << virtualByteAddress << std::dec << std::endl; - std::cerr << "Bucket index from hashtable: " << std::hex << hashtable->getBucketIndex(virtualByteAddress) << std::dec << std::endl; - std::cerr << "thread Id from hashtable: " << std::hex << hashtable->getThreadIdFromVirtualByteAddress(virtualByteAddress) << std::dec << std::endl; + std::cerr << "Virtual address from hashtable: " << std::hex + << virtualByteAddress << std::dec << std::endl; + std::cerr << "Bucket index from hashtable: " << std::hex + << hashtable->getBucketIndex(virtualByteAddress) << std::dec + << std::endl; + std::cerr << "thread Id from hashtable: " << std::hex + << hashtable->getThreadIdFromVirtualByteAddress(virtualByteAddress) + << std::dec << std::endl; #if 0 typedef dragenos::reference::HashRecord HashRecord; // TODO: add some tests for getHits @@ -773,29 +842,30 @@ TEST_F(HashtableFixture, DISABLED_CheckReferenceBasesHashtableV7) #endif } -TEST_F(HashtableFixture, ExploreExtend) -{ +TEST_F(HashtableFixture, ExploreExtend) { using dragenos::reference::Bucket; using dragenos::reference::HashRecord; const auto table = globalTestEnvironment->table; - const auto bucketCount = hashtableConfig->getHashtableBytes() / sizeof(Bucket); - auto buckets = reinterpret_cast(table); + const auto bucketCount = + hashtableConfig->getHashtableBytes() / sizeof(Bucket); + auto buckets = reinterpret_cast(table); bool found = false; size_t count = 0; uint64_t maxExtensionLength = 0; - for (size_t i = 0; i < bucketCount && !found; ++i) - { + for (size_t i = 0; i < bucketCount && !found; ++i) { const auto &bucket = buckets[i]; - for (const auto &hashRecord: bucket) - { - //if (HashRecord::EXTEND == bucket[0].getType()) - if (HashRecord::EXTEND == hashRecord.getType()) - { + for (const auto &hashRecord : bucket) { + // if (HashRecord::EXTEND == bucket[0].getType()) + if (HashRecord::EXTEND == hashRecord.getType()) { ++count; - maxExtensionLength = std::max(maxExtensionLength, hashRecord.getExtensionLength()); - if (1 == (hashRecord.getExtensionLength() % 2)) // as the extension length is supposed to be the total it should be even + maxExtensionLength = + std::max(maxExtensionLength, hashRecord.getExtensionLength()); + if (1 == (hashRecord.getExtensionLength() % + 2)) // as the extension length is supposed to be the total it + // should be even { - std::cerr << " QQQQQQQQQQQQ: " << (unsigned)hashRecord.getExtensionLength() << std::endl; + std::cerr << " QQQQQQQQQQQQ: " + << (unsigned)hashRecord.getExtensionLength() << std::endl; } #if 0 std::cerr << "\n ------------\ni: " << i << std::endl; @@ -821,47 +891,45 @@ TEST_F(HashtableFixture, ExploreExtend) std::cerr << std::endl; } #endif - } } } - std::cerr << "\n ------------------- \n Found " << count << " EXTEND records in " << bucketCount << " buckets. Max Extension Length: " << (unsigned)maxExtensionLength << std::endl; - //for (const auto p: {787527575UL, 2796142304UL, 213253491UL, 1333620284UL}) + std::cerr << "\n ------------------- \n Found " << count + << " EXTEND records in " << bucketCount + << " buckets. Max Extension Length: " + << (unsigned)maxExtensionLength << std::endl; + // for (const auto p: {787527575UL, 2796142304UL, 213253491UL, 1333620284UL}) //{ - + //} - //const auto table = Environment::table; + // const auto table = Environment::table; } -TEST_F(HashtableFixture, DISABLED_ExploreHits) -{ +TEST_F(HashtableFixture, DISABLED_ExploreHits) { using dragenos::reference::Bucket; const auto table = globalTestEnvironment->table; - auto buckets = reinterpret_cast(table); + auto buckets = reinterpret_cast(table); bool found = false; - for (size_t i = 0; i < 10000000 && !found; ++i) - { + for (size_t i = 0; i < 10000000 && !found; ++i) { const auto &bucket = buckets[i]; - //for (const auto &hashRecord: bucket) + // for (const auto &hashRecord: bucket) { - if (bucket[0].isHit()) - { + if (bucket[0].isHit()) { std::cerr << "\n ------------\ni: " << i << std::endl; - for (const auto &hashRecord: bucket) - { + for (const auto &hashRecord : bucket) { std::cerr << " type: " << hashRecord.getType(); - if (hashRecord.isHit()) - { - std::cerr << " value: " << std::hex << hashRecord.getValue() << " " << std::oct << hashRecord.getValue() << std::hex << " "; - std::cerr << "thread Id: " << (unsigned)hashRecord.getThreadId() << " "; + if (hashRecord.isHit()) { + std::cerr << " value: " << std::hex << hashRecord.getValue() << " " + << std::oct << hashRecord.getValue() << std::hex << " "; + std::cerr << "thread Id: " << (unsigned)hashRecord.getThreadId() + << " "; std::cerr << "hash bits: " << hashRecord.getHashBits() << " "; std::cerr << hashRecord.isExtendedSeed() << " "; std::cerr << hashRecord.isLastInThread() << " "; std::cerr << hashRecord.isReverseComplement() << " "; const size_t position = hashRecord.getPosition(); std::cerr << "position: " << std::dec << position << std::endl; - for (unsigned i = position -5 ; i < position + 25; ++i) - { + for (unsigned i = position - 5; i < position + 25; ++i) { std::cerr << (unsigned int)referenceSequence.getBase(i) << " "; } found |= (!hashRecord.isReverseComplement()); @@ -871,15 +939,14 @@ TEST_F(HashtableFixture, DISABLED_ExploreHits) } } } - //for (const auto p: {787527575UL, 2796142304UL, 213253491UL, 1333620284UL}) + // for (const auto p: {787527575UL, 2796142304UL, 213253491UL, 1333620284UL}) //{ - + //} - //const auto table = Environment::table; + // const auto table = Environment::table; } -TEST_F(HashtableFixture, DISABLED_Config) -{ +TEST_F(HashtableFixture, DISABLED_Config) { ASSERT_NE(nullptr, hashtableConfig); const auto &buffer = Environment::getHashtableConfigText(); std::istringstream is(std::string(buffer.data(), buffer.size())); @@ -893,29 +960,39 @@ TEST_F(HashtableFixture, DISABLED_Config) getline(is, line); // 5th line is "# Do not modify. getline(is, line); // 6th line is empty std::vector> keyValues; - while (is && getline(is, line)) - { + while (is && getline(is, line)) { keyValues.push_back(keyAndValue(line)); } - const std::map keyValuesMap(keyValues.begin(), keyValues.end()); - ASSERT_EQ(keyValuesMap.size(), keyValues.size()); // check that all keys are unique - ASSERT_EQ((unsigned long)stol(keyValuesMap.at("reference_len_raw")), hashtableConfig->getReferenceLength()); - ASSERT_EQ((unsigned)stoi(keyValuesMap.at("reference_sequences")), hashtableConfig->getNumberOfSequences()); - ASSERT_LT(5 * hashtableConfig->getNumberOfSequences(), keyValues.size()); // at least 5 entries per sequence - for(unsigned i = 0; hashtableConfig->getNumberOfSequences() > i; ++i) - { + const std::map keyValuesMap(keyValues.begin(), + keyValues.end()); + ASSERT_EQ(keyValuesMap.size(), + keyValues.size()); // check that all keys are unique + ASSERT_EQ((unsigned long)stol(keyValuesMap.at("reference_len_raw")), + hashtableConfig->getReferenceLength()); + ASSERT_EQ((unsigned)stoi(keyValuesMap.at("reference_sequences")), + hashtableConfig->getNumberOfSequences()); + ASSERT_LT(5 * hashtableConfig->getNumberOfSequences(), + keyValues.size()); // at least 5 entries per sequence + for (unsigned i = 0; hashtableConfig->getNumberOfSequences() > i; ++i) { const std::string indexString = std::to_string(i); const auto numberOfSequences = hashtableConfig->getNumberOfSequences(); const auto numberOfKeys = keyValues.size(); const size_t index = numberOfKeys - 5 * (numberOfSequences - i); // check that we have the expected keys in the text config file - ASSERT_EQ(std::string("reference_sequence") + indexString, keyValues[index].first); - ASSERT_EQ(std::string("reference_start") + indexString, keyValues[index + 1].first); - ASSERT_EQ(std::string("reference_beg_trim") + indexString, keyValues[index+ 2 ].first); - ASSERT_EQ(std::string("reference_end_trim") + indexString, keyValues[index + 3].first); - ASSERT_EQ(std::string("reference_len") + indexString, keyValues[index + 4].first); - // check that the hashtable config values match the values in the text file for this sequence - const std::string expectedName = keyValues[index].second.substr(1, keyValues[index].second.size() - 2); // enclosed in '' + ASSERT_EQ(std::string("reference_sequence") + indexString, + keyValues[index].first); + ASSERT_EQ(std::string("reference_start") + indexString, + keyValues[index + 1].first); + ASSERT_EQ(std::string("reference_beg_trim") + indexString, + keyValues[index + 2].first); + ASSERT_EQ(std::string("reference_end_trim") + indexString, + keyValues[index + 3].first); + ASSERT_EQ(std::string("reference_len") + indexString, + keyValues[index + 4].first); + // check that the hashtable config values match the values in the text file + // for this sequence + const std::string expectedName = keyValues[index].second.substr( + 1, keyValues[index].second.size() - 2); // enclosed in '' ASSERT_EQ(expectedName, hashtableConfig->getSequenceName(i)); const auto &sequence = hashtableConfig->getSequences()[i]; ASSERT_EQ(keyValues[index + 1].second, std::to_string(sequence.seqStart)); @@ -925,28 +1002,30 @@ TEST_F(HashtableFixture, DISABLED_Config) } } -TEST_F(HashtableFixture, HashtableContent) -{ - ASSERT_NE(nullptr, hashtable); -} +TEST_F(HashtableFixture, HashtableContent) { ASSERT_NE(nullptr, hashtable); } -TEST_F(HashtableFixture, DISABLED_HashSeed) -{ +TEST_F(HashtableFixture, DISABLED_HashSeed) { using namespace dragenos::sequences; const CrcHasher primaryHasher(hashtableConfig->getPrimaryPolynomial()); const CrcHasher secondaryHasher(hashtableConfig->getSecondaryPolynomial()); - TestRead tr = {"@dummy", "ACGTAACCGGTTAAACCCGGGTTTAAAACCCCGGGGTTTT", "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB"}; + TestRead tr = {"@dummy", "ACGTAACCGGTTAAACCCGGGTTTAAAACCCCGGGGTTTT", + "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB"}; dragenos::sequences::Read read; read.init(tr.name(), tr.bases(), tr.qualities(), 0, 0); ASSERT_LT(hashtableConfig->getPrimarySeedBases(), read.getLength()); - const dragenos::sequences::Seed primarySeed(&read, 0U, hashtableConfig->getPrimarySeedBases()); - const dragenos::sequences::Seed extendedSeed(&read, 7U, hashtableConfig->getPrimarySeedBases()); - ASSERT_EQ(primaryHasher.getHash64(primarySeed.getPrimaryData(false)), hashtable->getPrimaryHasher()->getHash64(primarySeed.getPrimaryData(false))); - ASSERT_EQ(secondaryHasher.getHash64(extendedSeed.getPrimaryData(false)), hashtable->getSecondaryHasher()->getHash64(extendedSeed.getPrimaryData(false))); + const dragenos::sequences::Seed primarySeed( + &read, 0U, hashtableConfig->getPrimarySeedBases()); + const dragenos::sequences::Seed extendedSeed( + &read, 7U, hashtableConfig->getPrimarySeedBases()); + ASSERT_EQ(primaryHasher.getHash64(primarySeed.getPrimaryData(false)), + hashtable->getPrimaryHasher()->getHash64( + primarySeed.getPrimaryData(false))); + ASSERT_EQ(secondaryHasher.getHash64(extendedSeed.getPrimaryData(false)), + hashtable->getSecondaryHasher()->getHash64( + extendedSeed.getPrimaryData(false))); } -TEST_F(HashtableFixture, DISABLED_GetExtendRecord) -{ +TEST_F(HashtableFixture, DISABLED_GetExtendRecord) { #if 0 using namespace dragenos::sequences; const CrcHasher primaryHasher(hashtableConfig->getPrimaryPolynomial()); @@ -987,9 +1066,8 @@ TEST_F(HashtableFixture, DISABLED_GetExtendRecord) #endif } -TEST_F(HashtableFixture, DoItAll) -{ - //ASSERT_TRUE(false); +TEST_F(HashtableFixture, DoItAll) { + // ASSERT_TRUE(false); #if 0 Mapper mapper; mapper.load(directory); @@ -1012,106 +1090,125 @@ TEST_F(HashtableFixture, DoItAll) } std::vector Environment::hashtableConfigText; -std::unique_ptr Environment::hashtableConfig(nullptr); +std::unique_ptr + Environment::hashtableConfig(nullptr); std::unique_ptr Environment::hashtable(nullptr); unsigned char *Environment::referenceData = nullptr; size_t Environment::referenceFileSize = 0; -std::vector slurp(const boost::filesystem::path &filePath) -{ +std::vector slurp(const boost::filesystem::path &filePath) { const ssize_t fileSize = file_size(filePath); - if (0 == fileSize) - { + if (0 == fileSize) { return std::vector(); } std::vector ret; ret.resize(fileSize); std::ifstream is(filePath.string()); - if (is && is.read(ret.data(), fileSize) && (fileSize == is.gcount())) - { + if (is && is.read(ret.data(), fileSize) && (fileSize == is.gcount())) { return ret; } - BOOST_THROW_EXCEPTION(dragenos::common::IoException(errno, "Failed to read file")); + BOOST_THROW_EXCEPTION( + dragenos::common::IoException(errno, "Failed to read file")); } -void Environment::SetUp() -{ +void Environment::SetUp() { const auto argv = testing::internal::GetArgvs(); namespace bfs = boost::filesystem; - ASSERT_TRUE(argv.size() > 1 || (nullptr != getenv("REFDIR"))) << "Checking for reference-directory on the command line or in the environment variable REFDIR"; + ASSERT_TRUE(argv.size() > 1 || (nullptr != getenv("REFDIR"))) + << "Checking for reference-directory on the command line or in the " + "environment variable REFDIR"; const bfs::path referenceDir(argv.size() > 1 ? argv[1] : getenv("REFDIR")); - std::cerr << "\n" << argv[0] << ": using rederence directory: " << referenceDir << "\n" << std::endl; - //const bfs::path readsPath(argv[2]); - //const bfs::path mappingsPath(argv[3]); - ASSERT_TRUE(exists(referenceDir)) << "checking the existence of the reference-directory: " << referenceDir; + std::cerr << "\n" + << argv[0] << ": using rederence directory: " << referenceDir + << "\n" + << std::endl; + // const bfs::path readsPath(argv[2]); + // const bfs::path mappingsPath(argv[3]); + ASSERT_TRUE(exists(referenceDir)) + << "checking the existence of the reference-directory: " << referenceDir; const bfs::path hashtableConfigTextFile = referenceDir / "hash_table.cfg"; - ASSERT_TRUE(exists(hashtableConfigTextFile)) << "checking the existence of the hashtable config (text): " << hashtableConfigTextFile; + ASSERT_TRUE(exists(hashtableConfigTextFile)) + << "checking the existence of the hashtable config (text): " + << hashtableConfigTextFile; hashtableConfigText = slurp(hashtableConfigTextFile); const bfs::path hashtableConfigFile = referenceDir / "hash_table.cfg.bin"; - ASSERT_TRUE(exists(hashtableConfigFile)) << "checking the existence of the hashtable config: " << hashtableConfigFile; + ASSERT_TRUE(exists(hashtableConfigFile)) + << "checking the existence of the hashtable config: " + << hashtableConfigFile; const std::vector config = slurp(hashtableConfigFile); hashtableConfig.reset(new HashtableConfig(config.data(), config.size())); const bfs::path hashtableFile = referenceDir / "hash_table.bin"; const bfs::path extendTableFile = referenceDir / "extend_table.bin"; - ASSERT_TRUE(exists(hashtableFile)) << "checking the existence of the uncompressed hashtable: " << hashtableFile; + ASSERT_TRUE(exists(hashtableFile)) + << "checking the existence of the uncompressed hashtable: " + << hashtableFile; hashtableFd = open(hashtableFile.c_str(), O_RDONLY, 0); - ASSERT_LT(-1, hashtableFd) << "failed to open hashtable: " << hashtableFile << ": " << strerror(errno); + ASSERT_LT(-1, hashtableFd) << "failed to open hashtable: " << hashtableFile + << ": " << strerror(errno); const auto tableSize = boost::filesystem::file_size(hashtableFile); ASSERT_EQ(tableSize, hashtableConfig->getHashtableBytes()); const int prot = PROT_READ; const int flags = MAP_PRIVATE | MAP_NORESERVE; const int offset = 0; - table = static_cast (mmap(NULL, tableSize, prot, flags, hashtableFd, offset)); - ASSERT_NE(MAP_FAILED, table) << "failed to map hashtable file: " << strerror(errno); - if (8 <= hashtableConfig.get()->getHashtableVersion()) - { + table = static_cast( + mmap(NULL, tableSize, prot, flags, hashtableFd, offset)); + ASSERT_NE(MAP_FAILED, table) + << "failed to map hashtable file: " << strerror(errno); + if (8 <= hashtableConfig.get()->getHashtableVersion()) { extendTableFd = open(extendTableFile.c_str(), O_RDONLY, 0); - ASSERT_LT(-1, extendTableFd) << "failed to open extendTable: " << extendTableFile << ": " << strerror(errno); + ASSERT_LT(-1, extendTableFd) + << "failed to open extendTable: " << extendTableFile << ": " + << strerror(errno); const auto extendTableSize = boost::filesystem::file_size(extendTableFile); ASSERT_EQ(extendTableSize, hashtableConfig->getExtendTableBytes()); - extendTable = static_cast (mmap(NULL, extendTableSize, prot, flags, extendTableFd, offset)); - ASSERT_NE(MAP_FAILED, extendTable) << "failed to map extendTable file: " << strerror(errno); + extendTable = static_cast( + mmap(NULL, extendTableSize, prot, flags, extendTableFd, offset)); + ASSERT_NE(MAP_FAILED, extendTable) + << "failed to map extendTable file: " << strerror(errno); } - // As Hashtable can't be initialized after construction, it has to be built dynamically + // As Hashtable can't be initialized after construction, it has to be built + // dynamically hashtable.reset(new Hashtable(hashtableConfig.get(), table, extendTable)); - // We need the actual reference to check that the result of the queries is consistent with the actual reference + // We need the actual reference to check that the result of the queries is + // consistent with the actual reference const bfs::path referenceFile = referenceDir / "reference.bin"; - ASSERT_TRUE(exists(referenceFile)) << "checking the existence of the packed reference sequence: " << referenceFile; + ASSERT_TRUE(exists(referenceFile)) + << "checking the existence of the packed reference sequence: " + << referenceFile; referenceFileSize = file_size(referenceFile); referenceFd = open(referenceFile.c_str(), O_RDONLY, 0); - ASSERT_NE(-1, referenceFd) << "failed to open reference file " << referenceFile << ": " << strerror(errno); - referenceData = static_cast(mmap(NULL, referenceFileSize, prot, flags, referenceFd, offset)); - ASSERT_NE(MAP_FAILED, referenceData) << "failed to mmap reference file " << referenceFile << ": " << strerror(errno); + ASSERT_NE(-1, referenceFd) << "failed to open reference file " + << referenceFile << ": " << strerror(errno); + referenceData = static_cast( + mmap(NULL, referenceFileSize, prot, flags, referenceFd, offset)); + ASSERT_NE(MAP_FAILED, referenceData) + << "failed to mmap reference file " << referenceFile << ": " + << strerror(errno); } -void Environment::TearDown() -{ - if (-1 != hashtableFd) - { +void Environment::TearDown() { + if (-1 != hashtableFd) { close(hashtableFd); hashtableFd = -1; } - if (-1 != extendTableFd) - { + if (-1 != extendTableFd) { close(extendTableFd); extendTableFd = -1; } - if ((nullptr != table) && (MAP_FAILED != table)) - { - const auto tableSize = hashtableConfig->getHashtableBytes(); + if ((nullptr != table) && (MAP_FAILED != table)) { + const auto tableSize = hashtableConfig->getHashtableBytes(); munmap(table, tableSize); table = nullptr; } - if ((nullptr != extendTable) && (MAP_FAILED != extendTable)) - { + if ((nullptr != extendTable) && (MAP_FAILED != extendTable)) { const auto extendTableSize = hashtableConfig->getExtendTableBytes(); munmap(extendTable, extendTableSize); extendTable = nullptr; } hashtable.reset(nullptr); - if (-1 != referenceFd) close(referenceFd); - if ((nullptr != referenceData) && (MAP_FAILED != referenceData)) - { + if (-1 != referenceFd) + close(referenceFd); + if ((nullptr != referenceData) && (MAP_FAILED != referenceData)) { munmap(referenceData, referenceFileSize); referenceData = nullptr; } @@ -1120,11 +1217,12 @@ void Environment::TearDown() HashtableFixture::HashtableConfig *HashtableFixture::hashtableConfig; HashtableFixture::Hashtable *HashtableFixture::hashtable; HashtableFixture::ReferenceSequence HashtableFixture::referenceSequence; -void HashtableFixture::SetUpTestCase() -{ +void HashtableFixture::SetUpTestCase() { hashtableConfig = Environment::getHashtableConfig(); hashtable = Environment::getHashtable(); - referenceSequence.reset(hashtableConfig->getTrimmedRegions(), Environment::getReferenceData(), Environment::getReferenceSize()); + referenceSequence.reset(hashtableConfig->getTrimmedRegions(), + Environment::getReferenceData(), + Environment::getReferenceSize()); } int main(int argc, char **argv) { @@ -1132,41 +1230,45 @@ int main(int argc, char **argv) { //::testing::FLAGS_gtest_throw_on_failure = true; ::testing::FLAGS_gtest_throw_on_failure = false; globalTestEnvironment = new Environment; - /* ::testing::Environment* const env = */ ::testing::AddGlobalTestEnvironment(globalTestEnvironment); + /* ::testing::Environment* const env = */ ::testing::AddGlobalTestEnvironment( + globalTestEnvironment); return RUN_ALL_TESTS(); delete globalTestEnvironment; } // Hashes a data buffer using a CRC polynomial of the same length, bit-by-bit. -// The same-length requirement makes the hash reversible. No information is lost, -// and different data words are guaranteed to have different hashes. This means -// that a key match can be confirmed by comparing the hash alone, or rather the -// portion not implicitly matched by use as address bits. -void* crcHashSlow(int bits, void const* poly, void const* data, void* hash) -{ +// The same-length requirement makes the hash reversible. No information is +// lost, and different data words are guaranteed to have different hashes. This +// means that a key match can be confirmed by comparing the hash alone, or +// rather the portion not implicitly matched by use as address bits. +void *crcHashSlow(int bits, void const *poly, void const *data, void *hash) { int bytes = (bits + 7) >> 3, topByte = bytes - 1; - int topBitMask = (1 << ((bits + 7) % 8)), topByteMask = ((topBitMask << 1) - 1); + int topBitMask = (1 << ((bits + 7) % 8)), + topByteMask = ((topBitMask << 1) - 1); int i, j, subtract; -#define POLY ((unsigned char*)poly) -#define HASH ((unsigned char*)hash) - // Since data and polynomial are the same length, copy in all the data bytes immediately. - // This data order doesn't match normal CRC computation, which by processing byte zero first, - // effectively treats it as the most significant byte of the dividend. But with odd bit - // lengths, this works better. +#define POLY ((unsigned char *)poly) +#define HASH ((unsigned char *)hash) + // Since data and polynomial are the same length, copy in all the data bytes + // immediately. This data order doesn't match normal CRC computation, which by + // processing byte zero first, effectively treats it as the most significant + // byte of the dividend. But with odd bit lengths, this works better. memcpy(hash, data, bytes); // Loop through the bits for (i = 0; i < bits; i++) { // Plan to subtract the polynomial if the MSB is 1 subtract = (HASH[topByte] & topBitMask); - // Left-shift the remainder (corresponds to right-shifting the polynomial position) - for (j = topByte; j > 0; j--) HASH[j] = (HASH[j] << 1) | (HASH[j - 1] >> 7); + // Left-shift the remainder (corresponds to right-shifting the polynomial + // position) + for (j = topByte; j > 0; j--) + HASH[j] = (HASH[j] << 1) | (HASH[j - 1] >> 7); HASH[0] <<= 1; // Subtract the polynomial if required to cancel the MSB shifted out if (subtract) - for (j = 0; j < bytes; j++) HASH[j] ^= POLY[j]; + for (j = 0; j < bytes; j++) + HASH[j] ^= POLY[j]; } // Mask off unused positions in the top byte HASH[topByte] &= topByteMask; @@ -1177,17 +1279,18 @@ void* crcHashSlow(int bits, void const* poly, void const* data, void* hash) } // Optimized 64-bit version -void* crcHash64Init(int bits, void const* poly) -{ - int bytes = (bits + 7) >> 3, i, j; - int bufQw = (1 + 256 * bytes); - uint64_t *init = (uint64_t*)calloc(8, bufQw), *p = init; - uint64_t data; - - if (!init) return NULL; +void *crcHash64Init(int bits, void const *poly) { + int bytes = (bits + 7) >> 3, i, j; + int bufQw = (1 + 256 * bytes); + uint64_t *init = (uint64_t *)calloc(8, bufQw), *p = init; + uint64_t data; + + if (!init) + return NULL; // Store the byte count in the init buffer *p++ = bytes; - // Store the slow-hash of each byte value 0-255 in each byte position in a data buffer + // Store the slow-hash of each byte value 0-255 in each byte position in a + // data buffer for (i = 0; i < bytes; i++) { for (j = 0; j < 256; j++) { data = (uint64_t)j << (i << 3); @@ -1198,10 +1301,9 @@ void* crcHash64Init(int bits, void const* poly) } // Optimized 64-bit version -void* crcHash64(uint64_t* init, uint8_t const* data, uint64_t* hash) -{ - uint64_t h = 0; - int bytes = *init++; +void *crcHash64(uint64_t *init, uint8_t const *data, uint64_t *hash) { + uint64_t h = 0; + int bytes = *init++; while (bytes--) { h ^= init[*data++]; @@ -1211,33 +1313,37 @@ void* crcHash64(uint64_t* init, uint8_t const* data, uint64_t* hash) return hash; } -uint8_t encodebaseTo2Bits(const char c) -{ - switch (c) - { - case 'A': return 0; break; - case 'C': return 1; break; - case 'G': return 2; break; - case 'T': return 3; break; - default: throw std::invalid_argument(std::string("base must be ACGT: ") + c); +uint8_t encodebaseTo2Bits(const char c) { + switch (c) { + case 'A': + return 0; + break; + case 'C': + return 1; + break; + case 'G': + return 2; + break; + case 'T': + return 3; + break; + default: + throw std::invalid_argument(std::string("base must be ACGT: ") + c); }; } -//std::vector generateRefSeq(std::vector bases) -std::vector generateRefSeq(std::string bases) -{ +// std::vector generateRefSeq(std::vector bases) +std::vector generateRefSeq(std::string bases) { assert(0 == (bases.size() % 4)); std::vector refSeq; refSeq.resize((bases.size() + 3) / 4); uint8_t seqByte = 0; - for (unsigned i = 0; bases.size() > i; ++i) - { + for (unsigned i = 0; bases.size() > i; ++i) { const auto c = bases[i]; const uint8_t encoded = encodebaseTo2Bits(c); seqByte |= ((encoded & 3) << ((i & 3) << 2)); - if (3 == (i & 3)) - { - refSeq[i/4] = seqByte; + if (3 == (i & 3)) { + refSeq[i / 4] = seqByte; seqByte = 0; } } diff --git a/tests/generate.cpp b/tests/generate.cpp index 97efe42..352c357 100644 --- a/tests/generate.cpp +++ b/tests/generate.cpp @@ -1,63 +1,62 @@ -#include #include -#include #include #include +#include +#include #include -int main(int argc, char **argv) -{ - if (2 != argc) - { - std::cerr << "Usage: " << argv[0] << " path/to/fasta/reference/genome.fa" << std::endl; - std::cerr - << "\ngenerate a single fastq from the first reference sequence found in the" - << "\nfasta file. Each read uses 2 lines in the fasta file and skips one line" - << std::endl; - exit (1); +int main(int argc, char **argv) { + if (2 != argc) { + std::cerr << "Usage: " << argv[0] << " path/to/fasta/reference/genome.fa" + << std::endl; + std::cerr << "\ngenerate a single fastq from the first reference sequence " + "found in the" + << "\nfasta file. Each read uses 2 lines in the fasta file and " + "skips one line" + << std::endl; + exit(1); } const boost::filesystem::path fasta = argv[1]; std::cerr << "\ngenerating fastq from fasta input: " << fasta << std::endl; std::ifstream is(fasta.c_str()); std::string line, s1, s2; - // skip - while (std::getline(is, line) && line.empty()) - { + // skip + while (std::getline(is, line) && line.empty()) { } - if (line.empty() || ('>' != line[0])) - { - std::cerr << "failed to find reference sequence: first line is: " << line << std::endl; - exit (2); + if (line.empty() || ('>' != line[0])) { + std::cerr << "failed to find reference sequence: first line is: " << line + << std::endl; + exit(2); } std::cerr << "using reference sequence: " << line << std::endl; const auto ws = line.find(' '); std::cerr << "ws: " << ws << std::endl; - const std::string name = line.substr(1, (ws == std::string::npos ? ws : ws -1)); + const std::string name = + line.substr(1, (ws == std::string::npos ? ws : ws - 1)); std::cerr << "name: " << name << std::endl; std::ostringstream read; const char Q = '@' + 32; std::string qualities; size_t position = 0; - while(std::getline(is, line) && (!line.empty()) && ('>' != line[0])) - { + while (std::getline(is, line) && (!line.empty()) && ('>' != line[0])) { position += line.length(); - if (getline(is, s1) && (!s1.empty()) && ('>' != s1[0]) && - getline(is, s2) && (!s2.empty()) && ('>' != s2[0])) - { + if (getline(is, s1) && (!s1.empty()) && ('>' != s1[0]) && getline(is, s2) && + (!s2.empty()) && ('>' != s2[0])) { qualities.resize(s1.length() + s2.length(), Q); read.str(""); - read << name << ':' << std::setw(10) << std::setfill('0') << position << std::setfill(' ') << '\n' << s1 << s2 << "\n+\n" << qualities; + read << name << ':' << std::setw(10) << std::setfill('0') << position + << std::setfill(' ') << '\n' + << s1 << s2 << "\n+\n" + << qualities; position += s1.length() + s2.length(); - if ((std::string::npos != s1.find('N')) || (std::string::npos != s2.find('N'))) - { + if ((std::string::npos != s1.find('N')) || + (std::string::npos != s2.find('N'))) { continue; } std::cout << read.str() << std::endl; - //break; - } - else - { + // break; + } else { break; } }