Skip to content

Commit

Permalink
feat: restore gene model url (#177)
Browse files Browse the repository at this point in the history
  • Loading branch information
hunterckx committed Nov 17, 2024
1 parent 4f1f59f commit 94f19e4
Show file tree
Hide file tree
Showing 10 changed files with 86 additions and 30 deletions.
1 change: 1 addition & 0 deletions app/apis/catalog/brc-analytics-catalog/common/entities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ export interface BRCDataCatalogGenome {
chromosomes: number | null;
coverage: string | null;
gcPercent: number;
geneModelUrl: string | null;
isRef: boolean;
length: number;
level: string;
Expand Down
2 changes: 1 addition & 1 deletion app/components/Entity/components/AnalysisMethod/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ import { ANALYSIS_METHOD } from "../../../../apis/catalog/brc-analytics-catalog/
export interface Props {
analysisMethod: ANALYSIS_METHOD;
content: ReactNode;
geneModelUrl: string;
geneModelUrl: string | null;
genomeVersionAssemblyId: string;
}
4 changes: 2 additions & 2 deletions app/utils/galaxy-api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ const WORKFLOW_LANDING_URL_PREFIX =
export async function getWorkflowLandingUrl(
workflowId: WORKFLOW_ID,
referenceGenome: string,
geneModelUrl: string
geneModelUrl: string | null
): Promise<string> {
const body: WorkflowLandingsBody = {
public: true,
Expand Down Expand Up @@ -74,7 +74,7 @@ function buildFastaUrl(identifier: string): string {
function getWorkflowLandingsRequestState(
workflowId: WORKFLOW_ID,
referenceGenome: string,
geneModelUrl: string
geneModelUrl: string | null
): WorkflowLandingsBodyRequestState {
if (workflowId === WORKFLOW_ID.VARIANT_CALLING && geneModelUrl) {
return {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ export const buildGenomeAnalysisMethod = (
): ComponentProps<typeof C.AnalysisMethod> => {
return {
...analysisMethodProps,
geneModelUrl: "",
geneModelUrl: genome.geneModelUrl,
genomeVersionAssemblyId: genome.accession,
};
};
Expand Down
1 change: 1 addition & 0 deletions files/build-catalog.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ async function buildGenomes(): Promise<BRCDataCatalogGenome[]> {
chromosomes: parseNumberOrNull(row.chromosomeCount),
coverage: parseStringOrNull(row.coverage),
gcPercent: parseNumber(row.gcPercent),
geneModelUrl: parseStringOrNull(row.geneModelUrl),
isRef: parseBoolean(row.isRef),
length: parseNumber(row.length),
level: row.level,
Expand Down
39 changes: 36 additions & 3 deletions files/build-files-from-ncbi.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from urllib.parse import quote as url_quote
import pandas as pd
import requests
import urllib.parse
import re

TAXA_URL = "https://docs.google.com/spreadsheets/d/1Gg9sw2Qw765tOx2To53XkTAn-RAMiBtqYrfItlLXXrc/gviz/tq?tqx=out:csv&sheet=Sheet1.csv"

Expand Down Expand Up @@ -28,7 +29,7 @@ def get_tax_ids(organisms_df):
return list(organisms_df["taxonomyId"])

def build_genomes_url(tax_ids):
return f"https://api.ncbi.nlm.nih.gov/datasets/v2/genome/taxon/{url_quote(",".join([str(id) for id in tax_ids]))}/dataset_report?filters.assembly_source=refseq&filters.has_annotation=true&filters.exclude_paired_reports=true&filters.exclude_atypical=true&filters.assembly_level=scaffold&filters.assembly_level=chromosome&filters.assembly_level=complete_genome"
return f"https://api.ncbi.nlm.nih.gov/datasets/v2/genome/taxon/{urllib.parse.quote(",".join([str(id) for id in tax_ids]))}/dataset_report?filters.assembly_source=refseq&filters.has_annotation=true&filters.exclude_paired_reports=true&filters.exclude_atypical=true&filters.assembly_level=scaffold&filters.assembly_level=chromosome&filters.assembly_level=complete_genome"

def get_genome_row(genome_info):
refseq_category = genome_info["assembly_info"].get("refseq_category")
Expand All @@ -52,6 +53,38 @@ def get_genome_row(genome_info):
def get_genomes_df(tax_ids):
return pd.DataFrame(data=[get_genome_row(genome_info) for genome_info in requests.get(build_genomes_url(tax_ids)).json()["reports"]])

def _id_to_gene_model_url(asm_id):
hubs_url = "https://hgdownload.soe.ucsc.edu/hubs/"
components = [asm_id[0:3], asm_id[4:7], asm_id[7:10], asm_id[10:13], asm_id, "genes"]
url = urllib.parse.urljoin(hubs_url, "/".join(components))
# url looks something like https://hgdownload.soe.ucsc.edu/hubs/GCF/030/504/385/GCF_030504385.1/genes/
# and contains html content with links to gene models.
# we need to scrape this to get the gtf
print(f"fetching url {url}")
response = requests.get(url)
try:
response.raise_for_status()
except Exception:
# FIXME?: Some accessions don't have a gene folder
return None
# find link to gtf, should ideally be ncbiRefSeq, but augustus will do
html_content = response.text
pattern = rf"{asm_id.replace('.', r'\.')}.*?\.gtf\.gz"
augustus_file = None
for match in re.findall(pattern, html_content):
if "ncbiRefSeq" in match:
return urllib.parse.urljoin(f"{url}/", match)
elif "augustus" in match:
augustus_file = match
if augustus_file:
return urllib.parse.urljoin(f"{url}/", augustus_file)
# No match, I guess that's OK ?
return None


def add_gene_model_url(genomes_df: pd.DataFrame):
return pd.concat([genomes_df, genomes_df["accession"].apply(_id_to_gene_model_url).rename("geneModelUrl")], axis="columns")

def build_files():
print("Building files")

Expand All @@ -71,7 +104,7 @@ def build_files():
gen_bank_merge_df = genomes_source_df.merge(assemblies_df, how="left", left_on="pairedAccession", right_on="genBank")
ref_seq_merge_df = genomes_source_df.merge(assemblies_df, how="left", left_on="accession", right_on="refSeq")

genomes_df = gen_bank_merge_df.combine_first(ref_seq_merge_df)
genomes_df = add_gene_model_url(gen_bank_merge_df.combine_first(ref_seq_merge_df))

genomes_df.to_csv(GENOMES_OUTPUT_PATH, index=False, sep="\t")

Expand Down
1 change: 1 addition & 0 deletions files/entities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export interface SourceGenome {
chromosomeCount: string;
coverage: string;
gcPercent: string;
geneModelUrl: string;
isRef: string;
length: string;
level: string;
Expand Down
20 changes: 20 additions & 0 deletions files/out/genomes.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"chromosomes": 14,
"coverage": null,
"gcPercent": 42.5,
"geneModelUrl": "https://hgdownload.soe.ucsc.edu/hubs/GCF/000/002/415/GCF_000002415.2/genes/GCF_000002415.2_ASM241v2.ncbiRefSeq.gtf.gz",
"isRef": true,
"length": 27007701,
"level": "Chromosome",
Expand All @@ -21,6 +22,7 @@
"chromosomes": 11,
"coverage": null,
"gcPercent": 46.5,
"geneModelUrl": "https://hgdownload.soe.ucsc.edu/hubs/GCF/000/002/445/GCF_000002445.2/genes/GCF_000002445.2_ASM244v1.ncbiRefSeq.gtf.gz",
"isRef": true,
"length": 26075494,
"level": "Chromosome",
Expand All @@ -37,6 +39,7 @@
"chromosomes": 36,
"coverage": null,
"gcPercent": 59.5,
"geneModelUrl": "https://hgdownload.soe.ucsc.edu/hubs/GCF/000/002/725/GCF_000002725.2/genes/GCF_000002725.2_ASM272v2.ncbiRefSeq.gtf.gz",
"isRef": true,
"length": 32855089,
"level": "Complete Genome",
Expand All @@ -53,6 +56,7 @@
"chromosomes": 14,
"coverage": "100.0x",
"gcPercent": 19.5,
"geneModelUrl": "https://hgdownload.soe.ucsc.edu/hubs/GCF/000/002/765/GCF_000002765.6/genes/GCF_000002765.6_GCA_000002765.ncbiRefSeq.gtf.gz",
"isRef": true,
"length": 23292622,
"level": "Complete Genome",
Expand All @@ -69,6 +73,7 @@
"chromosomes": 35,
"coverage": null,
"gcPercent": 58,
"geneModelUrl": "https://hgdownload.soe.ucsc.edu/hubs/GCF/000/002/845/GCF_000002845.2/genes/GCF_000002845.2_ASM284v2.ncbiRefSeq.gtf.gz",
"isRef": true,
"length": 32068771,
"level": "Chromosome",
Expand All @@ -85,6 +90,7 @@
"chromosomes": 14,
"coverage": "26.5x",
"gcPercent": 52.5,
"geneModelUrl": "https://hgdownload.soe.ucsc.edu/hubs/GCF/000/006/565/GCF_000006565.2/genes/GCF_000006565.2_TGA4.ncbiRefSeq.gtf.gz",
"isRef": true,
"length": 65633124,
"level": "Chromosome",
Expand All @@ -101,6 +107,7 @@
"chromosomes": null,
"coverage": null,
"gcPercent": 46,
"geneModelUrl": "https://hgdownload.soe.ucsc.edu/hubs/GCF/000/149/335/GCF_000149335.2/genes/GCF_000149335.2_ASM14933v2.ncbiRefSeq.gtf.gz",
"isRef": true,
"length": 28947925,
"level": "Scaffold",
Expand All @@ -117,6 +124,7 @@
"chromosomes": 1,
"coverage": null,
"gcPercent": 65.5,
"geneModelUrl": null,
"isRef": true,
"length": 4411532,
"level": "Complete Genome",
Expand All @@ -133,6 +141,7 @@
"chromosomes": null,
"coverage": null,
"gcPercent": 51.5,
"geneModelUrl": "https://hgdownload.soe.ucsc.edu/hubs/GCF/000/209/065/GCF_000209065.1/genes/GCF_000209065.1_ASM20906v1.ncbiRefSeq.gtf.gz",
"isRef": true,
"length": 89937456,
"level": "Scaffold",
Expand All @@ -149,6 +158,7 @@
"chromosomes": 36,
"coverage": null,
"gcPercent": 59.5,
"geneModelUrl": "https://hgdownload.soe.ucsc.edu/hubs/GCF/000/227/135/GCF_000227135.1/genes/GCF_000227135.1_ASM22713v2.ncbiRefSeq.gtf.gz",
"isRef": true,
"length": 32444968,
"level": "Chromosome",
Expand All @@ -165,6 +175,7 @@
"chromosomes": 1,
"coverage": null,
"gcPercent": 65.5,
"geneModelUrl": null,
"isRef": false,
"length": 4411709,
"level": "Complete Genome",
Expand All @@ -181,6 +192,7 @@
"chromosomes": 1,
"coverage": null,
"gcPercent": 33,
"geneModelUrl": null,
"isRef": false,
"length": 196858,
"level": "Complete Genome",
Expand All @@ -197,6 +209,7 @@
"chromosomes": 1,
"coverage": null,
"gcPercent": 38,
"geneModelUrl": null,
"isRef": false,
"length": 29903,
"level": "Complete Genome",
Expand All @@ -213,6 +226,7 @@
"chromosomes": 3,
"coverage": "250.0x",
"gcPercent": 37,
"geneModelUrl": "https://hgdownload.soe.ucsc.edu/hubs/GCF/016/801/865/GCF_016801865.2/genes/GCF_016801865.2_TS_CPP_V2.ncbiRefSeq.gtf.gz",
"isRef": true,
"length": 566339288,
"level": "Chromosome",
Expand All @@ -229,6 +243,7 @@
"chromosomes": 9,
"coverage": "475.0x",
"gcPercent": 46.5,
"geneModelUrl": "https://hgdownload.soe.ucsc.edu/hubs/GCF/018/416/015/GCF_018416015.2/genes/GCF_018416015.2_ASM1841601v2.ncbiRefSeq.gtf.gz",
"isRef": true,
"length": 28193268,
"level": "Complete Genome",
Expand All @@ -245,6 +260,7 @@
"chromosomes": 1,
"coverage": "20.0x",
"gcPercent": 65.5,
"geneModelUrl": null,
"isRef": false,
"length": 4516435,
"level": "Complete Genome",
Expand All @@ -261,6 +277,7 @@
"chromosomes": 14,
"coverage": "100.0x",
"gcPercent": 21.5,
"geneModelUrl": "https://hgdownload.soe.ucsc.edu/hubs/GCF/900/002/385/GCF_900002385.2/genes/GCF_900002385.2_GCA_900002385.ncbiRefSeq.gtf.gz",
"isRef": true,
"length": 23043114,
"level": "Complete Genome",
Expand All @@ -277,6 +294,7 @@
"chromosomes": 14,
"coverage": "155.0x",
"gcPercent": 23,
"geneModelUrl": "https://hgdownload.soe.ucsc.edu/hubs/GCF/900/681/995/GCF_900681995.1/genes/GCF_900681995.1_PVVCY_v1.ncbiRefSeq.gtf.gz",
"isRef": true,
"length": 18338688,
"level": "Complete Genome",
Expand All @@ -293,6 +311,7 @@
"chromosomes": 3,
"coverage": "54.0x",
"gcPercent": 44.5,
"geneModelUrl": "https://hgdownload.soe.ucsc.edu/hubs/GCF/943/734/735/GCF_943734735.2/genes/GCF_943734735.2_idAnoGambNW_F1_1.ncbiRefSeq.gtf.gz",
"isRef": true,
"length": 264451381,
"level": "Chromosome",
Expand All @@ -309,6 +328,7 @@
"chromosomes": 1,
"coverage": "100.0x",
"gcPercent": 65.5,
"geneModelUrl": null,
"isRef": false,
"length": 4469156,
"level": "Complete Genome",
Expand Down
42 changes: 21 additions & 21 deletions files/source/genomes-from-ncbi.tsv
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
taxon taxonomyId accession isRef level chromosomeCount length scaffoldCount scaffoldN50 scaffoldL50 coverage gcPercent annotationStatus pairedAccession ucscBrowser genBank refSeq
Mycobacterium tuberculosis H37Rv 83332 GCF_000195955.2 True Complete Genome 1.0 4411532 1 4411532 1 65.5 GCA_000195955.2
Plasmodium falciparum 3D7 36329 GCF_000002765.6 True Complete Genome 14.0 23292622 14 1687656 5 100.0x 19.5 Full annotation GCA_000002765.3 https://genome.ucsc.edu/h/GCF_000002765.5 GCA_000002765.3 GCF_000002765.5
Leishmania major strain Friedlin 347515 GCF_000002725.2 True Complete Genome 36.0 32855089 36 1091540 11 59.5 Full annotation GCA_000002725.2 https://genome.ucsc.edu/h/GCF_000002725.2 GCA_000002725.2 GCF_000002725.2
Plasmodium yoelii 5861 GCF_900002385.2 True Complete Genome 14.0 23043114 14 2046250 5 100.0x 21.5 Full annotation GCA_900002385.2 https://genome.ucsc.edu/h/GCF_900002385.2 GCA_900002385.2 GCF_900002385.2
Coccidioides posadasii str. Silveira 443226 GCF_018416015.2 True Complete Genome 9.0 28193268 9 8079863 2 475.0x 46.5 Full annotation GCA_018416015.2 https://genome.ucsc.edu/h/GCA_018416015.2 GCA_018416015.2 GCF_018416015.1
Plasmodium vinckei vinckei 54757 GCF_900681995.1 True Complete Genome 14.0 18338688 14 1692345 5 155.0x 23.0 Full annotation GCA_900681995.1 https://genome.ucsc.edu/h/GCF_900681995.1 GCA_900681995.1 GCF_900681995.1
Leishmania donovani 5661 GCF_000227135.1 True Chromosome 36.0 32444968 36 1024085 11 59.5 Full annotation GCA_000227135.2 https://genome.ucsc.edu/h/GCF_000227135.1 GCA_000227135.2 GCF_000227135.1
Toxoplasma gondii ME49 508771 GCF_000006565.2 True Chromosome 14.0 65633124 2276 4973582 6 26.5x 52.5 GCA_000006565.2 https://genome.ucsc.edu/h/GCF_000006565.2 GCA_000006565.2 GCF_000006565.2
Trypanosoma brucei brucei TREU927 185431 GCF_000002445.2 True Chromosome 11.0 26075494 12 2481190 4 46.5 Full annotation GCA_000002445.1 https://genome.ucsc.edu/h/GCF_000002445.2 GCA_000002445.1 GCF_000002445.2
Anopheles gambiae 7165 GCF_943734735.2 True Chromosome 3.0 264451381 190 99149756 2 54.0x 44.5 Full annotation GCA_943734735.2 https://genome.ucsc.edu/h/GCF_943734735.2 GCA_943734735.2 GCF_943734735.2
Plasmodium vivax 5855 GCF_000002415.2 True Chromosome 14.0 27007701 2747 1678596 6 42.5 Full annotation GCA_000002415.2 https://genome.ucsc.edu/h/GCF_000002415.2 GCA_000002415.2 GCF_000002415.2
Culex pipiens pallens 42434 GCF_016801865.2 True Chromosome 3.0 566339288 289 186194774 2 250.0x 37.0 Full annotation GCA_016801865.2
Leishmania braziliensis MHOM/BR/75/M2904 420245 GCF_000002845.2 True Chromosome 35.0 32068771 138 992961 11 58.0 GCA_000002845.2 https://genome.ucsc.edu/h/GCF_000002845.2 GCA_000002845.2 GCF_000002845.2
Trypanosoma cruzi 5693 GCF_000209065.1 True Scaffold 89937456 29495 88624 212 51.5 Full annotation GCA_000209065.1 https://genome.ucsc.edu/h/GCF_000209065.1 GCA_000209065.1 GCF_000209065.1
Coccidioides immitis RS 246410 GCF_000149335.2 True Scaffold 28947925 6 4323945 3 46.0 Full annotation GCA_000149335.2 https://genome.ucsc.edu/h/GCF_000149335.2 GCA_000149335.2 GCF_000149335.2
Mycobacterium tuberculosis H37Rv 83332 GCF_000277735.2 False Complete Genome 1.0 4411709 1 4411709 1 65.5 GCA_000277735.2
Severe acute respiratory syndrome coronavirus 2 2697049 GCF_009858895.2 False Complete Genome 1.0 29903 1 29903 1 38.0 GCA_009858895.3
Monkeypox virus 10244 GCF_000857045.1 False Complete Genome 1.0 196858 1 196858 1 33.0 GCA_000857045.1
Mycobacterium tuberculosis 1773 GCF_030566675.1 False Complete Genome 1.0 4516435 1 4516435 1 20.0x 65.5 GCA_030566675.1
Mycobacterium tuberculosis 1773 GCF_963525475.1 False Complete Genome 1.0 4469156 1 4469156 1 100.0x 65.5 GCA_963525475.1
taxon taxonomyId accession isRef level chromosomeCount length scaffoldCount scaffoldN50 scaffoldL50 coverage gcPercent annotationStatus pairedAccession ucscBrowser genBank refSeq geneModelUrl
Mycobacterium tuberculosis H37Rv 83332 GCF_000195955.2 True Complete Genome 1.0 4411532 1 4411532 1 65.5 GCA_000195955.2
Plasmodium falciparum 3D7 36329 GCF_000002765.6 True Complete Genome 14.0 23292622 14 1687656 5 100.0x 19.5 Full annotation GCA_000002765.3 https://genome.ucsc.edu/h/GCF_000002765.5 GCA_000002765.3 GCF_000002765.5 https://hgdownload.soe.ucsc.edu/hubs/GCF/000/002/765/GCF_000002765.6/genes/GCF_000002765.6_GCA_000002765.ncbiRefSeq.gtf.gz
Leishmania major strain Friedlin 347515 GCF_000002725.2 True Complete Genome 36.0 32855089 36 1091540 11 59.5 Full annotation GCA_000002725.2 https://genome.ucsc.edu/h/GCF_000002725.2 GCA_000002725.2 GCF_000002725.2 https://hgdownload.soe.ucsc.edu/hubs/GCF/000/002/725/GCF_000002725.2/genes/GCF_000002725.2_ASM272v2.ncbiRefSeq.gtf.gz
Plasmodium yoelii 5861 GCF_900002385.2 True Complete Genome 14.0 23043114 14 2046250 5 100.0x 21.5 Full annotation GCA_900002385.2 https://genome.ucsc.edu/h/GCF_900002385.2 GCA_900002385.2 GCF_900002385.2 https://hgdownload.soe.ucsc.edu/hubs/GCF/900/002/385/GCF_900002385.2/genes/GCF_900002385.2_GCA_900002385.ncbiRefSeq.gtf.gz
Coccidioides posadasii str. Silveira 443226 GCF_018416015.2 True Complete Genome 9.0 28193268 9 8079863 2 475.0x 46.5 Full annotation GCA_018416015.2 https://genome.ucsc.edu/h/GCA_018416015.2 GCA_018416015.2 GCF_018416015.1 https://hgdownload.soe.ucsc.edu/hubs/GCF/018/416/015/GCF_018416015.2/genes/GCF_018416015.2_ASM1841601v2.ncbiRefSeq.gtf.gz
Plasmodium vinckei vinckei 54757 GCF_900681995.1 True Complete Genome 14.0 18338688 14 1692345 5 155.0x 23.0 Full annotation GCA_900681995.1 https://genome.ucsc.edu/h/GCF_900681995.1 GCA_900681995.1 GCF_900681995.1 https://hgdownload.soe.ucsc.edu/hubs/GCF/900/681/995/GCF_900681995.1/genes/GCF_900681995.1_PVVCY_v1.ncbiRefSeq.gtf.gz
Leishmania donovani 5661 GCF_000227135.1 True Chromosome 36.0 32444968 36 1024085 11 59.5 Full annotation GCA_000227135.2 https://genome.ucsc.edu/h/GCF_000227135.1 GCA_000227135.2 GCF_000227135.1 https://hgdownload.soe.ucsc.edu/hubs/GCF/000/227/135/GCF_000227135.1/genes/GCF_000227135.1_ASM22713v2.ncbiRefSeq.gtf.gz
Toxoplasma gondii ME49 508771 GCF_000006565.2 True Chromosome 14.0 65633124 2276 4973582 6 26.5x 52.5 GCA_000006565.2 https://genome.ucsc.edu/h/GCF_000006565.2 GCA_000006565.2 GCF_000006565.2 https://hgdownload.soe.ucsc.edu/hubs/GCF/000/006/565/GCF_000006565.2/genes/GCF_000006565.2_TGA4.ncbiRefSeq.gtf.gz
Trypanosoma brucei brucei TREU927 185431 GCF_000002445.2 True Chromosome 11.0 26075494 12 2481190 4 46.5 Full annotation GCA_000002445.1 https://genome.ucsc.edu/h/GCF_000002445.2 GCA_000002445.1 GCF_000002445.2 https://hgdownload.soe.ucsc.edu/hubs/GCF/000/002/445/GCF_000002445.2/genes/GCF_000002445.2_ASM244v1.ncbiRefSeq.gtf.gz
Anopheles gambiae 7165 GCF_943734735.2 True Chromosome 3.0 264451381 190 99149756 2 54.0x 44.5 Full annotation GCA_943734735.2 https://genome.ucsc.edu/h/GCF_943734735.2 GCA_943734735.2 GCF_943734735.2 https://hgdownload.soe.ucsc.edu/hubs/GCF/943/734/735/GCF_943734735.2/genes/GCF_943734735.2_idAnoGambNW_F1_1.ncbiRefSeq.gtf.gz
Plasmodium vivax 5855 GCF_000002415.2 True Chromosome 14.0 27007701 2747 1678596 6 42.5 Full annotation GCA_000002415.2 https://genome.ucsc.edu/h/GCF_000002415.2 GCA_000002415.2 GCF_000002415.2 https://hgdownload.soe.ucsc.edu/hubs/GCF/000/002/415/GCF_000002415.2/genes/GCF_000002415.2_ASM241v2.ncbiRefSeq.gtf.gz
Culex pipiens pallens 42434 GCF_016801865.2 True Chromosome 3.0 566339288 289 186194774 2 250.0x 37.0 Full annotation GCA_016801865.2 https://hgdownload.soe.ucsc.edu/hubs/GCF/016/801/865/GCF_016801865.2/genes/GCF_016801865.2_TS_CPP_V2.ncbiRefSeq.gtf.gz
Leishmania braziliensis MHOM/BR/75/M2904 420245 GCF_000002845.2 True Chromosome 35.0 32068771 138 992961 11 58.0 GCA_000002845.2 https://genome.ucsc.edu/h/GCF_000002845.2 GCA_000002845.2 GCF_000002845.2 https://hgdownload.soe.ucsc.edu/hubs/GCF/000/002/845/GCF_000002845.2/genes/GCF_000002845.2_ASM284v2.ncbiRefSeq.gtf.gz
Trypanosoma cruzi 5693 GCF_000209065.1 True Scaffold 89937456 29495 88624 212 51.5 Full annotation GCA_000209065.1 https://genome.ucsc.edu/h/GCF_000209065.1 GCA_000209065.1 GCF_000209065.1 https://hgdownload.soe.ucsc.edu/hubs/GCF/000/209/065/GCF_000209065.1/genes/GCF_000209065.1_ASM20906v1.ncbiRefSeq.gtf.gz
Coccidioides immitis RS 246410 GCF_000149335.2 True Scaffold 28947925 6 4323945 3 46.0 Full annotation GCA_000149335.2 https://genome.ucsc.edu/h/GCF_000149335.2 GCA_000149335.2 GCF_000149335.2 https://hgdownload.soe.ucsc.edu/hubs/GCF/000/149/335/GCF_000149335.2/genes/GCF_000149335.2_ASM14933v2.ncbiRefSeq.gtf.gz
Mycobacterium tuberculosis H37Rv 83332 GCF_000277735.2 False Complete Genome 1.0 4411709 1 4411709 1 65.5 GCA_000277735.2
Severe acute respiratory syndrome coronavirus 2 2697049 GCF_009858895.2 False Complete Genome 1.0 29903 1 29903 1 38.0 GCA_009858895.3
Monkeypox virus 10244 GCF_000857045.1 False Complete Genome 1.0 196858 1 196858 1 33.0 GCA_000857045.1
Mycobacterium tuberculosis 1773 GCF_030566675.1 False Complete Genome 1.0 4516435 1 4516435 1 20.0x 65.5 GCA_030566675.1
Mycobacterium tuberculosis 1773 GCF_963525475.1 False Complete Genome 1.0 4469156 1 4469156 1 100.0x 65.5 GCA_963525475.1
Loading

0 comments on commit 94f19e4

Please sign in to comment.