From f00faebeb9d1173ae78c9d13c07b34b0359527f4 Mon Sep 17 00:00:00 2001 From: dariober Date: Tue, 26 Nov 2024 16:51:07 +0000 Subject: [PATCH] Start adding test and possibly some fixes for #481 Conversion to GFF3Feature includes source and score and sets ID and Parent attributes. However, export to GFF from UI fails. --- packages/apollo-cli/README.md | 2 +- .../src/export/export.service.ts | 4 +- .../src/GFF3/annotationFeatureToGFF3.test.ts | 34 ++++++ .../src/GFF3/annotationFeatureToGFF3.ts | 112 ++++++++++++++++++ .../src/GFF3/gff3ToAnnotationFeature.test.ts | 4 +- packages/apollo-shared/src/GFF3/index.ts | 1 + packages/apollo-shared/src/util.ts | 102 ---------------- packages/apollo-shared/test_data/gene.json | 69 +++++++++++ .../src/BackendDrivers/DesktopFileDriver.ts | 4 +- .../src/components/DownloadGFF3.tsx | 4 +- 10 files changed, 226 insertions(+), 110 deletions(-) create mode 100644 packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.test.ts create mode 100644 packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.ts create mode 100644 packages/apollo-shared/test_data/gene.json diff --git a/packages/apollo-cli/README.md b/packages/apollo-cli/README.md index 7e1b98051..f0d70be83 100644 --- a/packages/apollo-cli/README.md +++ b/packages/apollo-cli/README.md @@ -16,7 +16,7 @@ $ npm install -g @apollo-annotation/cli $ apollo COMMAND running command... $ apollo (--version) -@apollo-annotation/cli/0.1.21 linux-x64 node-v20.17.0 +@apollo-annotation/cli/0.1.21 linux-x64 node-v20.13.0 $ apollo --help [COMMAND] USAGE $ apollo COMMAND diff --git a/packages/apollo-collaboration-server/src/export/export.service.ts b/packages/apollo-collaboration-server/src/export/export.service.ts index eb8072c88..bff83ef45 100644 --- a/packages/apollo-collaboration-server/src/export/export.service.ts +++ b/packages/apollo-collaboration-server/src/export/export.service.ts @@ -25,7 +25,7 @@ import { RefSeqDocument, } from '@apollo-annotation/schemas' import { - makeGFF3Feature, + annotationFeatureToGFF3, splitStringIntoChunks, } from '@apollo-annotation/shared' import gff from '@gmod/gff' @@ -179,7 +179,7 @@ export class ExportService { const refSeqNames = Object.fromEntries( refSeqs.map((refSeq) => [refSeq._id, refSeq.name]), ) - const gff3Feature = makeGFF3Feature( + const gff3Feature = annotationFeatureToGFF3( flattened as unknown as AnnotationFeatureSnapshot, undefined, refSeqNames, diff --git a/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.test.ts b/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.test.ts new file mode 100644 index 000000000..50769a82a --- /dev/null +++ b/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.test.ts @@ -0,0 +1,34 @@ +/* eslint-disable @typescript-eslint/no-floating-promises */ + +import { describe, it } from 'node:test' +import { assert } from 'chai' +import { readAnnotationFeatureSnapshot } from './gff3ToAnnotationFeature.test' +import { annotationFeatureToGFF3 } from './annotationFeatureToGFF3' + +describe('annotationFeatureToGFF3', () => { + it('Convert one gene', () => { + const annotationFeature = readAnnotationFeatureSnapshot( + 'test_data/gene.json', + ) + const [gff3Feature] = annotationFeatureToGFF3(annotationFeature) + + assert.deepEqual(gff3Feature.type, 'gene') + assert.deepEqual(gff3Feature.start, 1000) + assert.deepEqual(gff3Feature.end, 9000) + assert.deepEqual(gff3Feature.strand, '+') + assert.deepEqual(gff3Feature.score, 123) + assert.deepEqual(gff3Feature.source, 'test_data') + assert.deepEqual(gff3Feature.attributes?.Name, ['EDEN']) + assert.deepEqual(gff3Feature.attributes?.testid, ['t003']) + assert.deepEqual(gff3Feature.attributes?.ID, ['gene10001']) + + const [children] = gff3Feature.child_features + const [mrna] = children + assert.deepEqual(mrna.type, 'mRNA') + assert.deepEqual(mrna.attributes?.Parent, ['gene10001']) + + // Sanity check the annotationFeature does have a score, etc. + // assert.deepEqual(annotationFeature.attributes?.gff_score, ['123']) + // assert.deepEqual(annotationFeature.attributes?.gff_source, ['test_data']) + }) +}) diff --git a/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.ts b/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.ts new file mode 100644 index 000000000..9caad6488 --- /dev/null +++ b/packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.ts @@ -0,0 +1,112 @@ +/* eslint-disable @typescript-eslint/no-unsafe-assignment */ + +import { AnnotationFeatureSnapshot } from '@apollo-annotation/mst' +import { GFF3Feature } from '@gmod/gff' + +export function annotationFeatureToGFF3( + feature: AnnotationFeatureSnapshot, + parentId?: string, + refSeqNames?: Record, +): GFF3Feature { + const locations = [{ start: feature.min, end: feature.max }] + // const locations = feature.discontinuousLocations?.length + // ? feature.discontinuousLocations + // : [{ start: feature.start, end: feature.end, phase: feature.phase }] + const attributes: Record = JSON.parse( + JSON.stringify(feature.attributes), + ) + const ontologyTerms: string[] = [] + const source = feature.attributes?.gff_source?.[0] ?? null + delete attributes.gff_source + if (parentId) { + attributes.Parent = [parentId] + } + if (attributes.gff_id) { + attributes.ID = attributes.gff_id + delete attributes.gff_id + } + if (attributes.gff_name) { + attributes.Name = attributes.gff_name + delete attributes.gff_name + } + if (attributes.gff_alias) { + attributes.Alias = attributes.gff_alias + delete attributes.gff_alias + } + if (attributes.gff_target) { + attributes.Target = attributes.gff_target + delete attributes.gff_target + } + if (attributes.gff_gap) { + attributes.Gap = attributes.gff_gap + delete attributes.gff_gap + } + if (attributes.gff_derives_from) { + attributes.Derives_from = attributes.gff_derives_from + delete attributes.gff_derives_from + } + if (attributes.gff_note) { + attributes.Note = attributes.gff_note + delete attributes.gff_note + } + if (attributes.gff_dbxref) { + attributes.Dbxref = attributes.gff_dbxref + delete attributes.gff_dbxref + } + if (attributes.gff_is_circular) { + attributes.Is_circular = attributes.gff_is_circular + delete attributes.gff_is_circular + } + if (attributes.gff_ontology_term) { + ontologyTerms.push(...attributes.gff_ontology_term) + delete attributes.gff_ontology_term + } + if (attributes['Gene Ontology']) { + ontologyTerms.push(...attributes['Gene Ontology']) + delete attributes['Gene Ontology'] + } + if (attributes['Sequence Ontology']) { + ontologyTerms.push(...attributes['Sequence Ontology']) + delete attributes['Sequence Ontology'] + } + if (ontologyTerms.length > 0) { + attributes.Ontology_term = ontologyTerms + } + + const gff_score = feature.attributes?.gff_score + let score = null + if (gff_score) { + if (gff_score.length == 1) { + score = Number(gff_score[0]) + } else { + throw new Error('Unexpected score') + } + } + delete attributes.gff_score + + return locations.map((location) => ({ + start: location.start + 1, + end: location.end, + seq_id: refSeqNames ? refSeqNames[feature.refSeq] ?? null : feature.refSeq, + source, + type: feature.type, + score, + strand: feature.strand ? (feature.strand === 1 ? '+' : '-') : null, + phase: null, + // phase: + // location.phase === 0 + // ? '0' + // : location.phase === 1 + // ? '1' + // : location.phase === 2 + // ? '2' + // : null, + attributes: Object.keys(attributes).length > 0 ? attributes : null, + derived_features: [], + child_features: feature.children + ? Object.values(feature.children).map((child) => + annotationFeatureToGFF3(child, attributes.ID?.[0], refSeqNames), + ) + : [], + })) +} diff --git a/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts b/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts index b6abc8f0a..64ed0c7ed 100644 --- a/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts +++ b/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts @@ -120,7 +120,9 @@ function readFeatureFile(fn: string): GFF3Feature[] { return inGff } -function readAnnotationFeatureSnapshot(fn: string): AnnotationFeatureSnapshot { +export function readAnnotationFeatureSnapshot( + fn: string, +): AnnotationFeatureSnapshot { const lines = readFileSync(fn).toString() return JSON.parse(lines) as AnnotationFeatureSnapshot } diff --git a/packages/apollo-shared/src/GFF3/index.ts b/packages/apollo-shared/src/GFF3/index.ts index fd9ae5fb0..45cec9a92 100644 --- a/packages/apollo-shared/src/GFF3/index.ts +++ b/packages/apollo-shared/src/GFF3/index.ts @@ -1,2 +1,3 @@ +export * from './annotationFeatureToGFF3' export * from './gffReservedKeys' export * from './gff3ToAnnotationFeature' diff --git a/packages/apollo-shared/src/util.ts b/packages/apollo-shared/src/util.ts index 36dda6c67..331a40a13 100644 --- a/packages/apollo-shared/src/util.ts +++ b/packages/apollo-shared/src/util.ts @@ -1,105 +1,3 @@ -/* eslint-disable @typescript-eslint/no-unsafe-assignment */ - -import { AnnotationFeatureSnapshot } from '@apollo-annotation/mst' -import { GFF3Feature } from '@gmod/gff' - -export function makeGFF3Feature( - feature: AnnotationFeatureSnapshot, - parentId?: string, - refSeqNames?: Record, -): GFF3Feature { - const locations = [{ start: feature.min, end: feature.max }] - // const locations = feature.discontinuousLocations?.length - // ? feature.discontinuousLocations - // : [{ start: feature.start, end: feature.end, phase: feature.phase }] - const attributes: Record = JSON.parse( - JSON.stringify(feature.attributes), - ) - const ontologyTerms: string[] = [] - const source = feature.attributes?.source?.[0] ?? null - delete attributes.source - if (parentId) { - attributes.Parent = [parentId] - } - if (attributes._id) { - attributes.ID = attributes._id - delete attributes._id - } - if (attributes.gff_name) { - attributes.Name = attributes.gff_name - delete attributes.gff_name - } - if (attributes.gff_alias) { - attributes.Alias = attributes.gff_alias - delete attributes.gff_alias - } - if (attributes.gff_target) { - attributes.Target = attributes.gff_target - delete attributes.gff_target - } - if (attributes.gff_gap) { - attributes.Gap = attributes.gff_gap - delete attributes.gff_gap - } - if (attributes.gff_derives_from) { - attributes.Derives_from = attributes.gff_derives_from - delete attributes.gff_derives_from - } - if (attributes.gff_note) { - attributes.Note = attributes.gff_note - delete attributes.gff_note - } - if (attributes.gff_dbxref) { - attributes.Dbxref = attributes.gff_dbxref - delete attributes.gff_dbxref - } - if (attributes.gff_is_circular) { - attributes.Is_circular = attributes.gff_is_circular - delete attributes.gff_is_circular - } - if (attributes.gff_ontology_term) { - ontologyTerms.push(...attributes.gff_ontology_term) - delete attributes.gff_ontology_term - } - if (attributes['Gene Ontology']) { - ontologyTerms.push(...attributes['Gene Ontology']) - delete attributes['Gene Ontology'] - } - if (attributes['Sequence Ontology']) { - ontologyTerms.push(...attributes['Sequence Ontology']) - delete attributes['Sequence Ontology'] - } - if (ontologyTerms.length > 0) { - attributes.Ontology_term = ontologyTerms - } - return locations.map((location) => ({ - start: location.start + 1, - end: location.end, - seq_id: refSeqNames ? refSeqNames[feature.refSeq] ?? null : feature.refSeq, - source, - type: feature.type, - score: null, - // score: feature.score ?? null, - strand: feature.strand ? (feature.strand === 1 ? '+' : '-') : null, - phase: null, - // phase: - // location.phase === 0 - // ? '0' - // : location.phase === 1 - // ? '1' - // : location.phase === 2 - // ? '2' - // : null, - attributes: Object.keys(attributes).length > 0 ? attributes : null, - derived_features: [], - child_features: feature.children - ? Object.values(feature.children).map((child) => - makeGFF3Feature(child, attributes.ID?.[0], refSeqNames), - ) - : [], - })) -} - export function splitStringIntoChunks( input: string, chunkSize: number, diff --git a/packages/apollo-shared/test_data/gene.json b/packages/apollo-shared/test_data/gene.json new file mode 100644 index 000000000..a7806d835 --- /dev/null +++ b/packages/apollo-shared/test_data/gene.json @@ -0,0 +1,69 @@ +{ + "_id": "66d70e4ccc30b55b65e5f619", + "refSeq": "chr1", + "type": "gene", + "min": 999, + "max": 9000, + "strand": 1, + "attributes": { + "gff_id": ["gene10001"], + "gff_name": ["EDEN"], + "gff_score": ["123"], + "gff_source": ["test_data"], + "testid": ["t003"] + }, + "children": { + "66d70e4ccc30b55b65e5f618": { + "_id": "66d70e4ccc30b55b65e5f618", + "refSeq": "chr1", + "type": "mRNA", + "min": 1049, + "max": 9000, + "strand": 1, + "children": { + "66d70e4ccc30b55b65e5f615": { + "_id": "66d70e4ccc30b55b65e5f615", + "refSeq": "chr1", + "type": "exon", + "min": 1049, + "max": 1500, + "strand": 1, + "attributes": { + "gff_id": ["exon10001"], + "testid": ["t007"] + } + }, + "66d70e4ccc30b55b65e5f616": { + "_id": "66d70e4ccc30b55b65e5f616", + "refSeq": "chr1", + "type": "exon", + "min": 4999, + "max": 5500, + "strand": 1, + "attributes": { + "gff_id": ["exon10004"], + "testid": ["t010"] + } + }, + "66d70e4ccc30b55b65e5f617": { + "_id": "66d70e4ccc30b55b65e5f617", + "refSeq": "chr1", + "type": "CDS", + "min": 1200, + "max": 5000, + "strand": 1, + "attributes": { + "gff_id": ["cds10001"], + "gff_name": ["edenprotein.1"], + "testid": ["t012", "t013", "t014", "t015"] + } + } + }, + "attributes": { + "gff_id": ["mRNA10001"], + "gff_name": ["EDEN.1"], + "testid": ["t004", "t001", "t004"] + } + } + } +} diff --git a/packages/jbrowse-plugin-apollo/src/BackendDrivers/DesktopFileDriver.ts b/packages/jbrowse-plugin-apollo/src/BackendDrivers/DesktopFileDriver.ts index 157e92a6a..fe21ccc90 100644 --- a/packages/jbrowse-plugin-apollo/src/BackendDrivers/DesktopFileDriver.ts +++ b/packages/jbrowse-plugin-apollo/src/BackendDrivers/DesktopFileDriver.ts @@ -10,9 +10,9 @@ import { } from '@apollo-annotation/mst' import { ValidationResultSet, - makeGFF3Feature, splitStringIntoChunks, } from '@apollo-annotation/shared' +import { annotationFeatureToGFF3 } from '@apollo-annotation/shared/src/GFF3/annotationFeatureToGFF3' import gff, { GFF3Item } from '@gmod/gff' import { getConf } from '@jbrowse/core/configuration' import { Region, getSession } from '@jbrowse/core/util' @@ -147,7 +147,7 @@ export class DesktopFileDriver extends BackendDriver { for (const [, refSeq] of clientAssembly.refSeqs) { const { features } = refSeq for (const [, feature] of features) { - gff3Items.push(makeGFF3Feature(getSnapshot(feature))) + gff3Items.push(annotationFeatureToGFF3(getSnapshot(feature))) } } for (const [, refSeq] of clientAssembly.refSeqs) { diff --git a/packages/jbrowse-plugin-apollo/src/components/DownloadGFF3.tsx b/packages/jbrowse-plugin-apollo/src/components/DownloadGFF3.tsx index 20fcd421e..6172ed258 100644 --- a/packages/jbrowse-plugin-apollo/src/components/DownloadGFF3.tsx +++ b/packages/jbrowse-plugin-apollo/src/components/DownloadGFF3.tsx @@ -4,7 +4,6 @@ /* eslint-disable @typescript-eslint/no-unnecessary-condition */ /* eslint-disable @typescript-eslint/no-misused-promises */ import { ApolloAssembly } from '@apollo-annotation/mst' -import { makeGFF3Feature } from '@apollo-annotation/shared' import gff, { GFF3Item } from '@gmod/gff' import { Assembly } from '@jbrowse/core/assemblyManager/assembly' import { getConf } from '@jbrowse/core/configuration' @@ -29,6 +28,7 @@ import { import { ApolloSessionModel } from '../session' import { createFetchErrorMessage } from '../util' import { Dialog } from './Dialog' +import { annotationFeatureToGFF3 } from '@apollo-annotation/shared/src/GFF3/annotationFeatureToGFF3' interface DownloadGFF3Props { session: ApolloSessionModel @@ -153,7 +153,7 @@ export function DownloadGFF3({ handleClose, session }: DownloadGFF3Props) { continue } for (const [, feature] of features) { - gff3Items.push(makeGFF3Feature(getSnapshot(feature))) + gff3Items.push(annotationFeatureToGFF3(getSnapshot(feature))) } } for (const sequenceFeature of sequenceFeatures) {