From ec09aa0fca0eddfcf5ee0c06ed17291faa845263 Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 9 Jul 2024 01:22:17 -0400 Subject: [PATCH] Misc refactoring --- eslint.config.mjs | 7 +- .../components/AlphaFoldDBSearch.tsx | 16 +- .../components/AlphaFoldDBSearchStatus.tsx | 28 +++- .../components/HelpDialog.tsx | 43 ++++-- .../components/LaunchProteinViewDialog.tsx | 2 +- src/LaunchProteinView/components/MSATable.tsx | 100 ++++++++---- .../components/TranscriptSelector.tsx | 3 +- .../components/UserProvidedStructure.tsx | 146 +++++++++++------- .../useLocalStructureFileSequence.ts | 14 +- .../useMyGeneInfoUniprotIdLookup.ts | 4 +- src/LaunchProteinView/components/util.ts | 6 +- src/ProteinView/components/SplitString.tsx | 2 +- src/ProteinView/model.ts | 3 +- 13 files changed, 241 insertions(+), 133 deletions(-) diff --git a/eslint.config.mjs b/eslint.config.mjs index 9da1e7d..a7698bb 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -87,7 +87,12 @@ export default [ ignoreRestSiblings: true, }, ], - + 'no-console': [ + 'warn', + { + allow: ['error', 'warn'], + }, + ], curly: 'error', 'no-extra-semi': 'off', 'unicorn/no-negated-condition': 'off', diff --git a/src/LaunchProteinView/components/AlphaFoldDBSearch.tsx b/src/LaunchProteinView/components/AlphaFoldDBSearch.tsx index 7b09b89..3a2c021 100644 --- a/src/LaunchProteinView/components/AlphaFoldDBSearch.tsx +++ b/src/LaunchProteinView/components/AlphaFoldDBSearch.tsx @@ -1,6 +1,6 @@ import React, { useEffect, useState } from 'react' import { observer } from 'mobx-react' -import { Button, DialogActions, DialogContent } from '@mui/material' +import { Button, DialogActions, DialogContent, Typography } from '@mui/material' import { makeStyles } from 'tss-react/mui' import { AbstractTrackModel, @@ -73,6 +73,7 @@ const AlphaFoldDBSearch = observer(function ({ ? getDisplayName(selectedTranscript) : getDisplayName(feature), }) + const url = uniprotId ? `https://alphafold.ebi.ac.uk/files/AF-${uniprotId}-F1-model_v4.cif` : undefined @@ -86,10 +87,6 @@ const AlphaFoldDBSearch = observer(function ({ useEffect(() => { if (isoformSequences !== undefined) { - console.log( - { structureSequence }, - options.find(f => isoformSequences[f.id()]?.seq == structureSequence), - ) const ret = options.find( f => @@ -98,16 +95,16 @@ const AlphaFoldDBSearch = observer(function ({ ) ?? options.find(f => !!isoformSequences[f.id()]) setUserSelection(ret?.id()) } - }, [options, structureSequence, userSelection, isoformSequences]) + }, [options, structureSequence, isoformSequences]) return ( <> {e ? : null} -
+ Automatically find AlphaFoldDB entry for given transcript{' '} -
+ {isRemoteStructureSequenceLoading ? ( @@ -57,11 +58,24 @@ export default function AlphaFoldDBSearchStatus({ {structureSequence ? ( - +
+ + {showAllProteinSequences ? ( + + ) : null} +
) : ( )} diff --git a/src/LaunchProteinView/components/HelpDialog.tsx b/src/LaunchProteinView/components/HelpDialog.tsx index 1fadd9c..e4d3104 100644 --- a/src/LaunchProteinView/components/HelpDialog.tsx +++ b/src/LaunchProteinView/components/HelpDialog.tsx @@ -18,26 +18,41 @@ export default function HelpDialog({ handleClose: () => void }) { return ( - + - The automatic lookup performs the following steps: -
    + The procedure for the protein lookup is as follows: +
    • - searches mygene.info for the transcript ID, in order to retrieve - the UniProt ID associated with a given transcript ID + (Automatic lookup) Searches mygene.info for the transcript ID, in + order to retrieve the UniProt ID associated with a given + transcript ID and then, the UniProt ID is used to lookup the + structure in AlphaFoldDB
    • - Then, it uses that UniProt ID to lookup the structure in - AlphaFoldDB because every UniProt ID has been processed by - AlphaFold. + (Manual) Allows you to choose your own structure file from your + local machine (e.g. a PDB file predicted by e.g. ColabFold) or + supply a specific URL
    • -
+
  • + The residues from the structure are downloaded, and then you can + choose the transcript isoform from the selected gene that best + represents the structure. Asterisks are displayed if there is an + exact sequence match +
  • +
  • + The residues from the structure are finally aligned to the to the + selected transcript's protein sequence representation, and + this creates a mapping from the reference genome coordinates to + positions in the 3-D structure +
  • +
  • + Finally the molstar panel is opened, and this contains many + specialized features features, plus additional mouseover and + selection features supplied by the plugin to connect mouse click + actions and mouse hover with coordinates on the linear genome view +
  • +
    If you run into challenges with this workflow e.g. your transcripts diff --git a/src/LaunchProteinView/components/LaunchProteinViewDialog.tsx b/src/LaunchProteinView/components/LaunchProteinViewDialog.tsx index 9f8dcc2..d9b5464 100644 --- a/src/LaunchProteinView/components/LaunchProteinViewDialog.tsx +++ b/src/LaunchProteinView/components/LaunchProteinViewDialog.tsx @@ -26,7 +26,7 @@ export default function LaunchProteinViewDialog({ open > setChoice(val)}> - + diff --git a/src/LaunchProteinView/components/MSATable.tsx b/src/LaunchProteinView/components/MSATable.tsx index a86c365..1d5efc9 100644 --- a/src/LaunchProteinView/components/MSATable.tsx +++ b/src/LaunchProteinView/components/MSATable.tsx @@ -1,5 +1,5 @@ -import React from 'react' -import { TextField } from '@mui/material' +import React, { useState } from 'react' +import { Checkbox, FormControlLabel, TextField } from '@mui/material' import { Feature, max } from '@jbrowse/core/util' import { makeStyles } from 'tss-react/mui' @@ -11,6 +11,9 @@ const useStyles = makeStyles()({ fontFamily: 'Courier New', whiteSpace: 'pre', }, + margin: { + marginLeft: 20, + }, }) export default function MSATable({ @@ -23,42 +26,71 @@ export default function MSATable({ isoformSequences: Record }) { const { classes } = useStyles() - const exactMatchIsoformAndStructureSeq = Object.entries( - isoformSequences, - ).find(([_, val]) => structureSequence === val.seq.replace('*', '')) + const [showInFastaFormat, setShowInFastaFormat] = useState(false) + const removedStars = Object.fromEntries( + Object.entries(isoformSequences).map(([key, val]) => [ + key, + { ...val, seq: val.seq.replaceAll('*', '') }, + ]), + ) + const exactMatchIsoformAndStructureSeq = Object.entries(removedStars).find( + ([_, val]) => structureSequence === val.seq, + ) + const sname = `${structureName || ''} (structure residues)` const maxKeyLen = max([ - structureName?.length ?? 0, - ...Object.entries(isoformSequences).map( + sname.length, + ...Object.entries(removedStars).map( ([_, val]) => getTranscriptDisplayName(val.feature).length, ), ]) + + const l1 = [ + `${sname.padEnd(maxKeyLen)}${exactMatchIsoformAndStructureSeq ? '*' : ' '} ${structureSequence}`, + exactMatchIsoformAndStructureSeq + ? `${getTranscriptDisplayName(exactMatchIsoformAndStructureSeq[1].feature).padEnd(maxKeyLen)}* ${exactMatchIsoformAndStructureSeq[1].seq}` + : undefined, + ...Object.entries(removedStars) + .map( + ([_, val]) => + `${getTranscriptDisplayName(val.feature).padEnd(maxKeyLen)} ${val.seq}`, + ) + .filter(([k]) => k !== exactMatchIsoformAndStructureSeq?.[0]), + ] + .filter(f => !!f) + .join('\n') + + const l2 = [ + `>${sname}\n${structureSequence}`, + ...Object.values(removedStars).map( + ({ feature, seq }) => `>${getTranscriptDisplayName(feature)}\n${seq}`, + ), + ].join('\n') return ( - - `${getTranscriptDisplayName(val.feature).padEnd(maxKeyLen)} ${val.seq}`, - ) - .filter(([k]) => k !== exactMatchIsoformAndStructureSeq?.[0]), - ] - .filter(f => !!f) - .join('\n')} - InputProps={{ - readOnly: true, - classes: { - input: classes.textAreaFont, - }, - }} - /> + <> + setShowInFastaFormat(event.target.checked)} + checked={showInFastaFormat} + /> + } + label="Show in FASTA format?" + /> + + ) } diff --git a/src/LaunchProteinView/components/TranscriptSelector.tsx b/src/LaunchProteinView/components/TranscriptSelector.tsx index 47fa23e..14a7281 100644 --- a/src/LaunchProteinView/components/TranscriptSelector.tsx +++ b/src/LaunchProteinView/components/TranscriptSelector.tsx @@ -45,7 +45,8 @@ export default function TranscriptSelector({ .map(f => ( {getGeneDisplayName(feature)} - {getTranscriptDisplayName(f)} ( - {isoformSequences[f.id()].seq.length}aa) matches structure sequence* + {isoformSequences[f.id()].seq.length}aa) (matches structure + residues) ))} {isoforms diff --git a/src/LaunchProteinView/components/UserProvidedStructure.tsx b/src/LaunchProteinView/components/UserProvidedStructure.tsx index 77c0c6c..4c4ee11 100644 --- a/src/LaunchProteinView/components/UserProvidedStructure.tsx +++ b/src/LaunchProteinView/components/UserProvidedStructure.tsx @@ -3,13 +3,13 @@ import { observer } from 'mobx-react' import { Button, DialogActions, - Radio, - RadioGroup, DialogContent, - TextField, FormControlLabel, FormControl, Link, + Radio, + RadioGroup, + TextField, Typography, } from '@mui/material' import { makeStyles } from 'tss-react/mui' @@ -29,9 +29,11 @@ import { getTranscriptDisplayName, getTranscriptFeatures, } from './util' + // components import TranscriptSelector from './TranscriptSelector' import MSATable from './MSATable' +import HelpButton from './HelpButton' // hooks import useIsoformProteinSequences from './useIsoformProteinSequences' @@ -77,27 +79,23 @@ const UserProvidedStructure = observer(function ({ const { classes } = useStyles() const session = getSession(model) const [file, setFile] = useState() + const [pdbId, setPdbId] = useState('') const [choice, setChoice] = useState('file') const [error2, setError] = useState() const [structureURL, setStructureURL] = useState('') - const [selection, setSelection] = useState() + const [userSelection, setUserSelection] = useState() + const [showAllProteinSequences, setShowAllProteinSequences] = useState(false) // check if we are looking at a 'two-level' or 'three-level' feature by // finding exon/CDS subfeatures. we want to select from transcript names const options = getTranscriptFeatures(feature) const view = getContainingView(model) as LGV - const selectedTranscript = options.find(val => getId(val) === selection) + const selectedTranscript = options.find(val => getId(val) === userSelection) const { isoformSequences, error } = useIsoformProteinSequences({ feature, view, }) - const protein = isoformSequences?.[selection ?? ''] - useEffect(() => { - if (selection === undefined && isoformSequences !== undefined) { - setSelection(options.find(f => !!isoformSequences[f.id()])?.id()) - } - }, [options, selection, isoformSequences]) - + const protein = isoformSequences?.[userSelection ?? ''] const { seq: structureSequence1, error: error3 } = useLocalStructureFileSequence({ file }) @@ -109,36 +107,30 @@ const UserProvidedStructure = observer(function ({ 'structureSequence' const structureSequence = structureSequence1 ?? structureSequence2 + useEffect(() => { + if (isoformSequences !== undefined) { + const ret = + options.find( + f => + isoformSequences[f.id()]?.seq.replaceAll('*', '') == + structureSequence, + ) ?? options.find(f => !!isoformSequences[f.id()]) + setUserSelection(ret?.id()) + } + }, [options, structureSequence, isoformSequences]) + const e = error || error2 || error3 || error4 return ( <> {e ? : null} - {isoformSequences ? ( - structureSequence ? ( - <> - - - - ) : null - ) : ( -
    - -
    - )} +
    + + Open your structure file + + } label="URL" /> } label="File" /> + } + label="PDB ID" + /> {choice === 'url' ? ( @@ -180,6 +177,56 @@ const UserProvidedStructure = observer(function ({
    ) : null} + {choice === 'pdb' ? ( + { + const s = event.target.value + setPdbId(s) + setStructureURL(`https://files.rcsb.org/download/${s}.cif`) + }} + label="PDB ID" + /> + ) : null} + +
    + {isoformSequences ? ( + structureSequence ? ( + <> + +
    + + + {showAllProteinSequences ? ( + + ) : null} +
    + + ) : null + ) : ( + + )}
    @@ -198,26 +245,15 @@ const UserProvidedStructure = observer(function ({ // eslint-disable-next-line @typescript-eslint/no-floating-promises ;(async () => { try { - if (file) { - const data = await file.text() - session.addView('ProteinView', { - type: 'ProteinView', - data, - seq2: protein, - feature: selectedTranscript?.toJSON(), - connectedViewId: view.id, - displayName: `Protein view ${getGeneDisplayName(feature)} - ${getTranscriptDisplayName(selectedTranscript)}`, - }) - } else if (structureURL) { - session.addView('ProteinView', { - type: 'ProteinView', - url: structureURL, - seq2: protein, - feature: selectedTranscript?.toJSON(), - connectedViewId: view.id, - displayName: `Protein view ${getGeneDisplayName(feature)} - ${getTranscriptDisplayName(selectedTranscript)}`, - }) - } + session.addView('ProteinView', { + type: 'ProteinView', + seq2: protein, + feature: selectedTranscript?.toJSON(), + connectedViewId: view.id, + displayName: `Protein view ${getGeneDisplayName(feature)} - ${getTranscriptDisplayName(selectedTranscript)}`, + ...(file ? { data: await file.text() } : {}), + ...(structureURL ? { url: structureURL } : {}), + }) handleClose() } catch (e) { console.error(e) diff --git a/src/LaunchProteinView/components/useLocalStructureFileSequence.ts b/src/LaunchProteinView/components/useLocalStructureFileSequence.ts index 3ad727e..f145a68 100644 --- a/src/LaunchProteinView/components/useLocalStructureFileSequence.ts +++ b/src/LaunchProteinView/components/useLocalStructureFileSequence.ts @@ -3,14 +3,17 @@ import { createPluginUI } from 'molstar/lib/mol-plugin-ui' import { renderReact18 } from 'molstar/lib/mol-plugin-ui/react18' import { loadStructureFromData } from '../../ProteinView/loadStructureFromData' -async function structureFileSequenceFetcher(file: File) { +async function structureFileSequenceFetcher( + file: File, + format: 'pdb' | 'mmcif', +) { const ret = document.createElement('div') const p = await createPluginUI({ target: ret, render: renderReact18, }) const data = await file.text() - const { seq } = await loadStructureFromData({ data, plugin: p }) + const { seq } = await loadStructureFromData({ data, plugin: p, format }) p.unmount() ret.remove() return seq @@ -30,7 +33,12 @@ export default function useLocalStructureFileSequence({ try { if (file) { setLoading(true) - const seq = await structureFileSequenceFetcher(file) + + const ext = file.name.slice(file.name.lastIndexOf('.') + 1) || 'pdb' + const seq = await structureFileSequenceFetcher( + file, + (ext === 'cif' ? 'mmcif' : ext) as 'pdb' | 'mmcif', + ) setSeq(seq) } } catch (e) { diff --git a/src/LaunchProteinView/components/useMyGeneInfoUniprotIdLookup.ts b/src/LaunchProteinView/components/useMyGeneInfoUniprotIdLookup.ts index 8bfb76f..10a995b 100644 --- a/src/LaunchProteinView/components/useMyGeneInfoUniprotIdLookup.ts +++ b/src/LaunchProteinView/components/useMyGeneInfoUniprotIdLookup.ts @@ -34,11 +34,9 @@ export default function useMyGeneInfo({ id }: { id: string }) { })() }, [id]) - console.log(result?.hits[0]?.uniprot) - return { isLoading, - uniprotId: result?.hits[0]?.uniprot['Swiss-Prot'], + uniprotId: result?.hits[0]?.uniprot?.['Swiss-Prot'], error, } } diff --git a/src/LaunchProteinView/components/util.ts b/src/LaunchProteinView/components/util.ts index 377b718..a3bd787 100644 --- a/src/LaunchProteinView/components/util.ts +++ b/src/LaunchProteinView/components/util.ts @@ -20,7 +20,7 @@ export function z(n: number) { } export function getDisplayName(f: Feature): string { - return f.get('id') + return f.get('name') || f.get('id') } export function getId(val?: Feature): string { @@ -30,13 +30,13 @@ export function getId(val?: Feature): string { export function getTranscriptDisplayName(val?: Feature): string { return val === undefined ? '' - : [val.get('name'), val.get('id')].filter(f => !!f).join(' ') + : [val.get('name') || val.get('id')].filter(f => !!f).join(' ') } export function getGeneDisplayName(val?: Feature): string { return val === undefined ? '' - : [val.get('gene_name') || val.get('name'), val.get('id')] + : [val.get('gene_name') || val.get('name') || val.get('id')] .filter(f => !!f) .join(' ') } diff --git a/src/ProteinView/components/SplitString.tsx b/src/ProteinView/components/SplitString.tsx index bac1d6c..0e5daa8 100644 --- a/src/ProteinView/components/SplitString.tsx +++ b/src/ProteinView/components/SplitString.tsx @@ -23,7 +23,7 @@ export default function SplitString({ style={{ background: col !== undefined && i === col - ? '#f69' + ? '#f698' : set?.has(i) && showHighlight ? '#33ff19' : undefined, diff --git a/src/ProteinView/model.ts b/src/ProteinView/model.ts index c571a1a..cee60ee 100644 --- a/src/ProteinView/model.ts +++ b/src/ProteinView/model.ts @@ -32,6 +32,7 @@ type MaybeLGV = LGV | undefined function stateModelFactory() { return types .compose( + 'ProteinView', BaseViewModel, types.model({ /** @@ -319,7 +320,7 @@ function stateModelFactory() { })) .actions(self => ({ afterAttach() { - // pairwise align transcript sequence to structure sequence + // pairwise align transcript sequence to structure residues addDisposer( self, autorun(async () => {