From 8326ecb92f64ceb2dfee18d689dd25a53e1361f6 Mon Sep 17 00:00:00 2001 From: ivan-aksamentov Date: Tue, 4 Jun 2024 02:24:08 +0200 Subject: [PATCH] fix(web): prevent writing large auspice json to local storage After `dataset-json-url` param is used, the entire auspice json was stored in local storage. For large files this would be bigger that what browsers allow and the store operation would fail. Subsequent reads would retrieve either nothing or some fragment of data. Here I add some logic to make sure we store auspice json in memory only. Sadly, this means that page reload wipes all the data and we currently don't have a good mechanism to store the previous dataset URL in order to re-fetch it. This will need to be implemented. Currently I opted for discarding the stored dataset if it was auspice dataset. This should be alright, as the feature was primarily designed to be used when navigating to Nextclade with url params set. This PR only implements a workaround for something that's fundamentally poorly designed and broken. The full dataset description object is still saved to local storage. The definitive fix requires serious restructuring and altering many components. It will be implemented later (https://github.com/nextstrain/nextclade/issues/1464) But this workaround should allow most use-cases of `dataset-json-url` to work and makes this auspice dataset feature releasable. Can be tested with a large tree like this: ``` ?dataset-json-url=https://nextstrain.org/ncov/gisaid/global/all-time ``` --- packages/nextclade-web/.eslintrc.js | 3 ++- .../nextclade-web/src/io/fetchDatasets.ts | 6 +---- .../src/io/fetchSingleDatasetAuspice.ts | 6 ++--- .../src/io/fetchSingleDatasetDirectory.ts | 14 +++++++--- packages/nextclade-web/src/pages/_app.tsx | 26 +++++++++++++------ packages/nextclade/src/io/dataset.rs | 11 ++++++++ 6 files changed, 46 insertions(+), 20 deletions(-) diff --git a/packages/nextclade-web/.eslintrc.js b/packages/nextclade-web/.eslintrc.js index f8d07bae4..b55fbdc6b 100644 --- a/packages/nextclade-web/.eslintrc.js +++ b/packages/nextclade-web/.eslintrc.js @@ -130,6 +130,7 @@ module.exports = { 'no-shadow': 'off', 'no-unused-vars': 'off', 'only-ascii/only-ascii': 'warn', + 'prefer-const': ['warn', { destructuring: 'all' }], 'prefer-for-of': 'off', 'prettier/prettier': 'warn', 'react-hooks/exhaustive-deps': [ @@ -148,7 +149,7 @@ module.exports = { 'react/state-in-constructor': 'off', 'security/detect-non-literal-fs-filename': 'off', 'security/detect-object-injection': 'off', - 'sonarjs/cognitive-complexity': ['warn', 20], + 'sonarjs/cognitive-complexity': 'off', 'unicorn/consistent-function-scoping': 'off', 'unicorn/escape-case': 'off', 'unicorn/filename-case': 'off', diff --git a/packages/nextclade-web/src/io/fetchDatasets.ts b/packages/nextclade-web/src/io/fetchDatasets.ts index 2804d8230..656ae638d 100644 --- a/packages/nextclade-web/src/io/fetchDatasets.ts +++ b/packages/nextclade-web/src/io/fetchDatasets.ts @@ -128,11 +128,7 @@ export async function initializeDatasets(datasetServerUrl: string, urlQuery: Par const minimizerIndexVersion = await getCompatibleMinimizerIndexVersion(datasetServerUrl, datasetsIndexJson) // Check if URL params specify dataset params and try to find the corresponding dataset - const currentDataset: - | (Dataset & { - auspiceJson?: AuspiceTree - }) - | undefined = await getDatasetFromUrlParams(urlQuery, datasets) + const currentDataset = await getDatasetFromUrlParams(urlQuery, datasets) return { datasets, currentDataset, minimizerIndexVersion } } diff --git a/packages/nextclade-web/src/io/fetchSingleDatasetAuspice.ts b/packages/nextclade-web/src/io/fetchSingleDatasetAuspice.ts index c7e43f99b..5666524d4 100644 --- a/packages/nextclade-web/src/io/fetchSingleDatasetAuspice.ts +++ b/packages/nextclade-web/src/io/fetchSingleDatasetAuspice.ts @@ -39,7 +39,7 @@ export async function fetchSingleDatasetAuspice(datasetJsonUrl_: string) { } } - const currentDataset: Dataset & { auspiceJson?: AuspiceTree } = { + const currentDataset: Dataset = { path: datasetJsonUrl, capabilities: { primers: false, @@ -50,8 +50,8 @@ export async function fetchSingleDatasetAuspice(datasetJsonUrl_: string) { name, ...pathogen?.attributes, }, + type: 'auspiceJson', version, - auspiceJson, } const datasets = [currentDataset] @@ -60,5 +60,5 @@ export async function fetchSingleDatasetAuspice(datasetJsonUrl_: string) { const defaultDatasetName = currentDatasetName const defaultDatasetNameFriendly = attrStrMaybe(currentDataset.attributes, 'name') ?? currentDatasetName - return { datasets, defaultDataset, defaultDatasetName, defaultDatasetNameFriendly, currentDataset } + return { datasets, defaultDataset, defaultDatasetName, defaultDatasetNameFriendly, currentDataset, auspiceJson } } diff --git a/packages/nextclade-web/src/io/fetchSingleDatasetDirectory.ts b/packages/nextclade-web/src/io/fetchSingleDatasetDirectory.ts index c51248f46..bfda9f71e 100644 --- a/packages/nextclade-web/src/io/fetchSingleDatasetDirectory.ts +++ b/packages/nextclade-web/src/io/fetchSingleDatasetDirectory.ts @@ -2,7 +2,7 @@ import axios from 'axios' import urljoin from 'url-join' import { mapValues } from 'lodash' import { concurrent } from 'fasy' -import { attrStrMaybe, AuspiceTree, Dataset, VirusProperties } from 'src/types' +import { attrStrMaybe, Dataset, VirusProperties } from 'src/types' import { removeTrailingSlash } from 'src/io/url' import { axiosFetch, axiosHead, axiosHeadOrUndefined } from 'src/io/axiosFetch' import { sanitizeError } from 'src/helpers/sanitizeError' @@ -17,7 +17,7 @@ export async function fetchSingleDatasetDirectory( const files = mapValues(pathogen.files, (file) => (file ? urljoin(datasetRootUrl, file) : file)) - const currentDataset: Dataset & { auspiceJson?: AuspiceTree } = { + const currentDataset: Dataset = { path: datasetRootUrl, capabilities: { primers: false, @@ -25,6 +25,7 @@ export async function fetchSingleDatasetDirectory( }, ...pathogen, files, + type: 'directory', } const datasets = [currentDataset] @@ -51,7 +52,14 @@ export async function fetchSingleDatasetDirectory( Object.entries(files).filter(([_, key]) => !['examples', 'readme'].includes(key)), ) - return { datasets, defaultDataset, defaultDatasetName, defaultDatasetNameFriendly, currentDataset } + return { + datasets, + defaultDataset, + defaultDatasetName, + defaultDatasetNameFriendly, + currentDataset, + auspiceJson: undefined, + } } async function fetchPathogenJson(datasetRootUrl: string) { diff --git a/packages/nextclade-web/src/pages/_app.tsx b/packages/nextclade-web/src/pages/_app.tsx index ca8a08609..01dcc5049 100644 --- a/packages/nextclade-web/src/pages/_app.tsx +++ b/packages/nextclade-web/src/pages/_app.tsx @@ -8,7 +8,6 @@ import { RecoilEnv, RecoilRoot, useRecoilCallback, useRecoilState, useRecoilValu import { AppProps } from 'next/app' import { useRouter } from 'next/router' import dynamic from 'next/dynamic' -import type { Dataset, AuspiceTree } from 'src/types' import { sanitizeError } from 'src/helpers/sanitizeError' import { useRunAnalysis } from 'src/hooks/useRunAnalysis' import i18nAuspice, { changeAuspiceLocale } from 'src/i18n/i18n.auspice' @@ -97,10 +96,10 @@ function RecoilStateInitializer() { const datasetInfo = await fetchSingleDataset(urlQuery) if (!isNil(datasetInfo)) { - const { datasets, currentDataset } = datasetInfo - return { datasets, currentDataset, minimizerIndexVersion: undefined } + const { datasets, currentDataset, auspiceJson } = datasetInfo + return { datasets, currentDataset, minimizerIndexVersion: undefined, auspiceJson } } - return { datasets, currentDataset, minimizerIndexVersion } + return { datasets, currentDataset, minimizerIndexVersion, auspiceJson: undefined } }) .catch((error) => { // Dataset error is fatal and we want error to be handled in the ErrorBoundary @@ -108,13 +107,24 @@ function RecoilStateInitializer() { set(globalErrorAtom, sanitizeError(error)) throw error }) - .then(async ({ datasets, currentDataset, minimizerIndexVersion }) => { + .then(async ({ datasets, currentDataset, minimizerIndexVersion, auspiceJson }) => { set(datasetsAtom, { datasets }) - const previousDataset = await getPromise(datasetCurrentAtom) - const dataset: (Dataset & { auspiceJson?: AuspiceTree }) | undefined = currentDataset ?? previousDataset + let previousDataset = await getPromise(datasetCurrentAtom) + if (previousDataset?.type === 'auspiceJson') { + // Disregard previously saved dataset if it's Auspice dataset, because the data is no longer available. + // We might re-fetch instead, but need to persist URL for that somehow. + previousDataset = undefined + } + + const dataset = currentDataset ?? previousDataset set(datasetCurrentAtom, dataset) set(minimizerIndexVersionAtom, minimizerIndexVersion) - set(datasetJsonAtom, dataset?.auspiceJson) + + if (dataset?.type === 'auspiceJson' && !isNil(auspiceJson)) { + set(datasetJsonAtom, auspiceJson) + } else { + set(datasetJsonAtom, undefined) + } return dataset }) .then(async (dataset) => { diff --git a/packages/nextclade/src/io/dataset.rs b/packages/nextclade/src/io/dataset.rs index ff5c15f23..52a67b520 100644 --- a/packages/nextclade/src/io/dataset.rs +++ b/packages/nextclade/src/io/dataset.rs @@ -78,6 +78,9 @@ pub struct Dataset { #[serde(default, skip_serializing_if = "DatasetVersion::is_empty")] pub version: DatasetVersion, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub r#type: Option, + #[serde(flatten)] pub other: serde_json::Value, } @@ -386,3 +389,11 @@ pub struct MinimizerIndexVersion { #[serde(flatten)] pub other: serde_json::Value, } + +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub enum DatasetType { + Directory, + AuspiceJson, + Other, +}