From 74f98551ffbf66ae6ccfd486ee60891c73728d75 Mon Sep 17 00:00:00 2001 From: Wei Shen Date: Thu, 15 Aug 2024 18:48:25 +0100 Subject: [PATCH] update benchmark results --- README.md | 12 ++++---- docs/content/_index.md | 12 ++++---- docs/content/introduction/_index.md | 46 +++++++++++++++------------- docs/content/performance@genbank.tsv | 8 ++--- 4 files changed, 40 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index 25d2593..df90595 100644 --- a/README.md +++ b/README.md @@ -43,12 +43,12 @@ Running at this scale has previously only been achieved by [Phylign](https://git **With LexicMap** (48 CPUs), - |Query |Genome hits|Time |RAM | - |:-------------------|----------:|-----:|------:| - |A 1.3-kb marker gene|37,164 |52s |4.1 GB | - |A 1.5-kb 16S rRNA |1,949,496 |13m53s|13.1 GB| - |A 52.8-kb plasmid |544,619 |23m30s|17.5 GB| - |1003 AMR genes |25,702,419 |4h02m |41.3 GB| + |Query |Genome hits|Time |RAM | + |:-------------------|----------:|---------:|------:| + |A 1.3-kb marker gene|37,164 |36 s |4.1 GB | + |A 1.5-kb 16S rRNA |1,949,496 |10 m 41 s |14.1 GB| + |A 52.8-kb plasmid |544,619 |19 m 20 s |19.3 GB| + |1003 AMR genes |25,702,419 |187 m 40 s|55.4 GB| More documents: https://bioinf.shenwei.me/LexicMap. diff --git a/docs/content/_index.md b/docs/content/_index.md index 28d3f40..ae9aee6 100644 --- a/docs/content/_index.md +++ b/docs/content/_index.md @@ -66,12 +66,12 @@ Step 2: searching Using LexicMap to search in the whole **2,340,672** Genbank+Refseq prokaryotic genomes with 48 CPUs. -|Query |Genome hits|Time |RAM | -|:-------------------|----------:|-----:|------:| -|A 1.3-kb marker gene|37,164 |52s |4.1 GB | -|A 1.5-kb 16S rRNA |1,949,496 |13m53s|13.1 GB| -|A 52.8-kb plasmid |544,619 |23m30s|17.5 GB| -|1003 AMR genes |25,702,419 |4h02m |41.3 GB| +|Query |Genome hits|Time |RAM | +|:-------------------|----------:|------:|-----:| +|A 1.3-kb gene|37,164 |36s |4.1GB | +|A 1.5-kb 16S rRNA |1,949,496 |10m41s |14.1GB| +|A 52.8-kb plasmid |544,619 |19m20s |19.3GB| +|1003 AMR genes |25,702,419 |187m40s|55.4GB| ***Blastn** is unable to run with the same dataset on common servers as it requires >2000 GB RAM*. diff --git a/docs/content/introduction/_index.md b/docs/content/introduction/_index.md index f9488cd..92dbddb 100644 --- a/docs/content/introduction/_index.md +++ b/docs/content/introduction/_index.md @@ -51,12 +51,12 @@ Running at this scale has previously only been achieved by [Phylign](https://git **With LexicMap** (48 CPUs), - |Query |Genome hits|Time |RAM | - |:-------------------|----------:|-----:|------:| - |A 1.3-kb marker gene|37,164 |52s |4.1 GB | - |A 1.5-kb 16S rRNA |1,949,496 |13m53s|13.1 GB| - |A 52.8-kb plasmid |544,619 |23m30s|17.5 GB| - |1003 AMR genes |25,702,419 |4h02m |41.3 GB| + |Query |Genome hits|Time |RAM | + |:-------------------|----------:|---------:|------:| + |A 1.3-kb marker gene|37,164 |36 s |4.1 GB | + |A 1.5-kb 16S rRNA |1,949,496 |10 m 41 s |14.1 GB| + |A 52.8-kb plasmid |544,619 |19 m 20 s |19.3 GB| + |1003 AMR genes |25,702,419 |187 m 40 s|55.4 GB| ## Quick start @@ -198,46 +198,48 @@ Phylign only has the index for AllTheBacteria HQ dataset. GTDB complete (402,538 genomes): - |query |query_len |tool |genome_hits|genome_hits(qcov>50)|time |RAM | -|:--------------|:------------|:--------------|----------:|-------------------:|---------:|-------:| -|a marker gene |1,299 bp |LexicMap |5,170 |5,143 |3.0 s |1.4 GB | +|:--------------|------------:|:--------------|----------:|-------------------:|---------:|-------:| +|a marker gene |1,299 bp |LexicMap |5,170 |5,143 |17 s |1.4 GB | | | |Blastn |7,121 |6,177 |2,171 s |351.2 GB| -|a 16S rRNA gene|1,542 bp |LexicMap |303,925 |278,141 |92 s |4.9 GB | +|a 16S rRNA gene|1,542 bp |LexicMap |303,925 |278,141 |235 s |4.4 GB | | | |Blastn |301,197 |277,042 |2,353 s |378.4 GB| -|a plasmid |52,830 bp |LexicMap |63,108 |1,190 |87 s |4.8 GB | +|a plasmid |52,830 bp |LexicMap |63,108 |1,190 |499 s |4.6 GB | | | |Blastn |69,311 |2,308 |2,262 s |364.7 GB| -|1033 AMR genes |1 kb (median)|LexicMap |3,867,003 |2,228,339 |1,254 s |21.4 GB | +|1033 AMR genes |1 kb (median)|LexicMap |3,867,003 |2,228,339 |4,350 s |16.3 GB | | | |Blastn |5,357,772 |2,240,766 |4,686 s |442.1 GB| + AllTheBacteria HQ (1,858,610 genomes): |query |query_len |tool |genome_hits|genome_hits(qcov>50)|time |RAM | -|:--------------|:------------|:--------------|----------:|-------------------:|---------:|-------:| -|a marker gene |1,299 bp |LexicMap |27,963 |27,953 |41.7 s |3.4 GB | +|:--------------|------------:|:--------------|----------:|-------------------:|---------:|-------:| +|a marker gene |1,299 bp |LexicMap |27,963 |27,953 |31 s |3.4 GB | | | |Phylign_local |7,936 | |30 m 48 s |77.6 GB | | | |Phylign_cluster|7,936 | |28 m 33 s | | -|a 16S rRNA gene|1,542 bp |LexicMap |1,857,761 |1,740,000 |13 m 24 s |13.7 GB | +|a 16S rRNA gene|1,542 bp |LexicMap |1,857,761 |1,740,000 |9 m 36 s |14.9 GB | | | |Phylign_local |1,017,765 | |130 m 33 s|77.0 GB | | | |Phylign_cluster|1,017,765 | |86 m 41 s | | -|a plasmid |52,830 bp |LexicMap |468,821 |3,618 |20 m 48 s |15.9 GB | +|a plasmid |52,830 bp |LexicMap |468,821 |3,618 |15 m 55 s |15.7 GB | | | |Phylign_local |46,822 | |47 m 33 s |82.6 GB | | | |Phylign_cluster|46,822 | |39 m 34 s | | -|1033 AMR genes |1 kb (median)|LexicMap |21,288,000 |12,148,642 |168 m 48 s|49.2 GB | +|1033 AMR genes |1 kb (median)|LexicMap |21,288,000 |12,148,642 |138 m 55 s|49.9 GB | | | |Phylign_local |1,135,215 | |156 m 08 s|85.9 GB | | | |Phylign_cluster|1,135,215 | |133 m 49 s| | + Genbank+RefSeq (2,340,672 genomes): |query |query_len |tool |genome_hits|genome_hits(qcov>50)|time |RAM | -|:--------------|:------------|:--------------|----------:|-------------------:|---------:|-------:| -|a marker gene |1,299 bp |LexicMap |37,164 |37,082 |51.9 s |4.1 GB | -|a 16S rRNA gene|1,542 bp |LexicMap |1,949,496 |1,381,974 |13 m 53 s |13.1 GB | -|a plasmid |52,830 bp |LexicMap |544,619 |6,563 |23 m 30 s |17.5 GB | -|1033 AMR genes |1 kb (median)|LexicMap |25,702,419 |14,692,624 |242 m 25 s|56.2 GB | +|:--------------|------------:|:--------------|----------:|-------------------:|---------:|-------:| +|a marker gene |1,299 bp |LexicMap |37,164 |37,082 |36 s |4.1 GB | +|a 16S rRNA gene|1,542 bp |LexicMap |1,949,496 |1,381,974 |10 m 41 s |14.1 GB | +|a plasmid |52,830 bp |LexicMap |544,619 |6,563 |19 m 20 s |19.3 GB | +|1033 AMR genes |1 kb (median)|LexicMap |25,702,419 |14,692,624 |187 m 40 s|55.4 GB | + Notes: - All files are stored on a server with HDD disks. No files are cached in memory. diff --git a/docs/content/performance@genbank.tsv b/docs/content/performance@genbank.tsv index b6013c6..6e3a529 100644 --- a/docs/content/performance@genbank.tsv +++ b/docs/content/performance@genbank.tsv @@ -1,5 +1,5 @@ Query Genome hits Time RAM -A 1.3-kb marker gene 37,164 52s 4.1 GB -A 1.5-kb 16S rRNA 1,949,496 13m53s 13.1 GB -A 52.8-kb plasmid 544,619 23m30s 17.5 GB -1003 AMR genes 25,702,419 4h02m 41.3 GB +A 1.3-kb marker gene 37,164 36 s 4.1 GB +A 1.5-kb 16S rRNA 1,949,496 10 m 41 s 14.1 GB +A 52.8-kb plasmid 544,619 19 m 20 s 19.3 GB +1003 AMR genes 25,702,419 187 m 40 s 55.4 GB