diff --git a/package-lock.json b/package-lock.json index e1270a4..20cc314 100644 --- a/package-lock.json +++ b/package-lock.json @@ -6596,9 +6596,9 @@ "license": "Apache-2.0" }, "node_modules/express": { - "version": "4.21.1", - "resolved": "https://registry.npmjs.org/express/-/express-4.21.1.tgz", - "integrity": "sha512-YSFlK1Ee0/GC8QaO91tHcDxJiE/X4FbpAyQWkxAvG6AXCuR65YzK8ua6D9hvi/TzUfZMpc+BwuM1IPw8fmQBiQ==", + "version": "4.21.2", + "resolved": "https://registry.npmjs.org/express/-/express-4.21.2.tgz", + "integrity": "sha512-28HqgMZAmih1Czt9ny7qr6ek2qddF4FclbMzwhCREB6OFfH+rXAnuNCwo1/wFvrtbgsQDb4kSbX9de9lFbrXnA==", "license": "MIT", "dependencies": { "accepts": "~1.3.8", @@ -6620,7 +6620,7 @@ "methods": "~1.1.2", "on-finished": "2.4.1", "parseurl": "~1.3.3", - "path-to-regexp": "0.1.10", + "path-to-regexp": "0.1.12", "proxy-addr": "~2.0.7", "qs": "6.13.0", "range-parser": "~1.2.1", @@ -6635,6 +6635,10 @@ }, "engines": { "node": ">= 0.10.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" } }, "node_modules/express-rate-limit": { @@ -12150,9 +12154,10 @@ } }, "node_modules/path-to-regexp": { - "version": "0.1.10", - "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.10.tgz", - "integrity": "sha512-7lf7qcQidTku0Gu3YDPc8DJ1q7OOucfa/BSsIwjuh56VU7katFvuM8hULfkwB3Fns/rsVF7PwPKVw1sl5KQS9w==" + "version": "0.1.12", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.12.tgz", + "integrity": "sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ==", + "license": "MIT" }, "node_modules/path-type": { "version": "4.0.0", diff --git a/src/app.ts b/src/app.ts index 00978c9..b0f2124 100644 --- a/src/app.ts +++ b/src/app.ts @@ -19,6 +19,7 @@ import { providerRouter } from './route/provider'; import { topicRouter } from './route/topic'; import { organisationRouter } from './route/organisation'; import { teamRouter } from './route/team'; +import { translationRouter } from './route/translation'; export const initDb = async (): Promise => { const dbManager = new DatabaseManager(logger); @@ -57,6 +58,7 @@ app.use('/provider', rateLimiter, passport.authenticate('jwt', { session: false app.use('/topic', rateLimiter, passport.authenticate('jwt', { session: false }), topicRouter); app.use('/organisation', rateLimiter, passport.authenticate('jwt', { session: false }), organisationRouter); app.use('/team', rateLimiter, passport.authenticate('jwt', { session: false }), teamRouter); +app.use('/translation', rateLimiter, passport.authenticate('jwt', { session: false }), translationRouter); app.use(errorHandler); diff --git a/src/dtos/tasklist-state-dto.ts b/src/dtos/tasklist-state-dto.ts index bae5b6b..9755ea1 100644 --- a/src/dtos/tasklist-state-dto.ts +++ b/src/dtos/tasklist-state-dto.ts @@ -1,10 +1,12 @@ +import { every } from 'lodash'; + import { Dataset } from '../entities/dataset/dataset'; import { DimensionInfo } from '../entities/dataset/dimension-info'; import { DimensionType } from '../enums/dimension-type'; import { TaskStatus } from '../enums/task-status'; +import { translatableMetadataKeys } from '../types/translatable-metadata'; import { DimensionStatus } from './dimension-status'; -import { MeasureDTO } from './measure-dto'; export class TasklistStateDTO { datatable: TaskStatus; @@ -33,6 +35,17 @@ export class TasklistStateDTO { when: TaskStatus; }; + public static translationStatus(dataset: Dataset): TaskStatus { + const metaFullyTranslated = dataset.datasetInfo?.every((info) => { + return every(translatableMetadataKeys, (key) => { + // ignore roundingDescription if rounding isn't applied, otherwise check some data exists + return key === 'roundingDescription' && !info.roundingApplied ? true : Boolean(info[key]); + }); + }); + + return metaFullyTranslated ? TaskStatus.Completed : TaskStatus.Incomplete; + } + public static fromDataset(dataset: Dataset, lang: string): TasklistStateDTO { const info = dataset.datasetInfo?.find((info) => info.language === lang); @@ -83,9 +96,14 @@ export class TasklistStateDTO { relevant_topics: dataset.datasetTopics?.length > 0 ? TaskStatus.Completed : TaskStatus.NotStarted }; + const dimensionsComplete = every(dimensions, (dim) => dim.status === TaskStatus.Completed); + const metadataComplete = every(dto.metadata, (status) => status === TaskStatus.Completed); + + // TODO: export should check for dimensionsComplete as well + // TODO: import should check export complete and nothing was updated since the export (needs audit table) dto.translation = { - export: TaskStatus.NotImplemented, - import: TaskStatus.NotImplemented + export: metadataComplete ? TaskStatus.Available : TaskStatus.CannotStart, + import: metadataComplete ? TasklistStateDTO.translationStatus(dataset) : TaskStatus.CannotStart }; dto.publishing = { diff --git a/src/dtos/translations-dto.ts b/src/dtos/translations-dto.ts new file mode 100644 index 0000000..1019dd7 --- /dev/null +++ b/src/dtos/translations-dto.ts @@ -0,0 +1,7 @@ +export class TranslationDTO { + type: string; + key: string; + id?: string; + english?: string; + cymraeg?: string; +} diff --git a/src/enums/task-status.ts b/src/enums/task-status.ts index 6e8666b..b2f43a4 100644 --- a/src/enums/task-status.ts +++ b/src/enums/task-status.ts @@ -1,5 +1,8 @@ export enum TaskStatus { + CannotStart = 'cannot_start', + Available = 'available', NotStarted = 'not_started', + Incomplete = 'incomplete', Completed = 'completed', NotImplemented = 'not_implemented' } diff --git a/src/repositories/dataset.ts b/src/repositories/dataset.ts index d0cc3a8..faec575 100644 --- a/src/repositories/dataset.ts +++ b/src/repositories/dataset.ts @@ -1,4 +1,4 @@ -import { DeepPartial, FindOneOptions, FindOptionsRelations } from 'typeorm'; +import { FindOneOptions, FindOptionsRelations } from 'typeorm'; import { has } from 'lodash'; import { dataSource } from '../db/data-source'; @@ -13,6 +13,8 @@ import { DatasetProviderDTO } from '../dtos/dataset-provider-dto'; import { DatasetProvider } from '../entities/dataset/dataset-provider'; import { DatasetTopic } from '../entities/dataset/dataset-topic'; import { Team } from '../entities/user/team'; +import { TranslationDTO } from '../dtos/translations-dto'; +import { DimensionInfo } from '../entities/dataset/dimension-info'; const defaultRelations: FindOptionsRelations = { createdBy: true, @@ -60,15 +62,19 @@ export const DatasetRepository = dataSource.getRepository(Dataset).extend({ await this.delete({ id }); }, - async createWithTitle(user: User, language?: string, title?: string): Promise { + async createWithTitle(user: User, language: Locale, title: string): Promise { logger.debug(`Creating new Dataset...`); const dataset = await this.create({ createdBy: user }).save(); + const altLang = language.includes('en') ? Locale.WelshGb : Locale.EnglishGb; - if (language && title) { - logger.debug(`Creating new DatasetInfo with language "${language}" and title "${title}"...`); - const datasetInfo = await dataSource.getRepository(DatasetInfo).create({ dataset, language, title }).save(); - dataset.datasetInfo = [datasetInfo]; - } + logger.debug(`Creating new DatasetInfo with language "${language}" and title "${title}"...`); + const datasetInfo = await dataSource.getRepository(DatasetInfo).create({ dataset, language, title }).save(); + const altLangDatasetInfo = await dataSource + .getRepository(DatasetInfo) + .create({ dataset, language: altLang }) + .save(); + + dataset.datasetInfo = [datasetInfo, altLangDatasetInfo]; return this.getById(dataset.id); }, @@ -183,6 +189,49 @@ export const DatasetRepository = dataSource.getRepository(Dataset).extend({ const team = await dataSource.getRepository(Team).findOneByOrFail({ id: teamId }); dataset.team = team; await dataset.save(); + return this.getById(datasetId); + }, + + async updateTranslations(datasetId: string, translations: TranslationDTO[]): Promise { + const dataset = await this.findOneOrFail({ where: { id: datasetId } }); + const dimensionInfoRepo = dataSource.getRepository(DimensionInfo); + const infoRepo = dataSource.getRepository(DatasetInfo); + + const dimensionTranslations = translations.filter((t) => t.type === 'dimension'); + + logger.debug(`Updating dimension names...`); + + for (const row of dimensionTranslations) { + const englishDimInfo = await dimensionInfoRepo.findOneByOrFail({ id: row.id, language: Locale.EnglishGb }); + englishDimInfo.name = row.english || ''; + await englishDimInfo.save(); + + const welshDimInfo = await dimensionInfoRepo.findOneByOrFail({ id: row.id, language: Locale.WelshGb }); + welshDimInfo.name = row.cymraeg || ''; + await welshDimInfo.save(); + } + + const metaTranslations = translations.filter((t) => t.type === 'metadata'); + + logger.debug(`Updating metadata...`); + + const englishInfo = + (await infoRepo.findOneBy({ dataset, language: Locale.EnglishGb })) || + infoRepo.create({ dataset, language: Locale.EnglishGb }); + + const welshInfo = + (await infoRepo.findOneBy({ dataset, language: Locale.WelshGb })) || + infoRepo.create({ ...englishInfo, language: Locale.WelshGb }); + + metaTranslations.forEach((row) => { + const metaKey = row.key as 'title' | 'description' | 'collection' | 'quality' | 'roundingDescription'; + englishInfo[metaKey] = row.english || ''; + welshInfo[metaKey] = row.cymraeg || ''; + }); + + await englishInfo.save(); + await welshInfo.save(); + return this.getById(datasetId); } }); diff --git a/src/resources/locales/en.json b/src/resources/locales/en.json index 5fa8deb..bc58189 100644 --- a/src/resources/locales/en.json +++ b/src/resources/locales/en.json @@ -69,6 +69,9 @@ "publish_at": { "invalid": "Publish at is missing or invalid", "in_past": "Publish at cannot be in the past" + }, + "translation_file": { + "invalid": "Uploaded translation file keys do not match the exported file" } }, "dimension_info": { diff --git a/src/route/dataset.ts b/src/route/dataset.ts index 3d352e9..385e48f 100644 --- a/src/route/dataset.ts +++ b/src/route/dataset.ts @@ -177,7 +177,8 @@ router.post('/', jsonParser, async (req: Request, res: Response, next: NextFunct } try { - const dataset = await DatasetRepository.createWithTitle(req.user as User, req.language, req.body.title); + const language = req.language as Locale; + const dataset = await DatasetRepository.createWithTitle(req.user as User, language, req.body.title); logger.info(`Dataset created with id: ${dataset.id}`); res.status(201); res.json(DatasetDTO.fromDataset(dataset)); diff --git a/src/route/translation.ts b/src/route/translation.ts new file mode 100644 index 0000000..afa1e84 --- /dev/null +++ b/src/route/translation.ts @@ -0,0 +1,169 @@ +import { Readable } from 'node:stream'; + +import { Request, Response, NextFunction, Router } from 'express'; +import { parse, stringify } from 'csv'; +import multer from 'multer'; + +import { logger } from '../utils/logger'; +import { UnknownException } from '../exceptions/unknown.exception'; +import { BadRequestException } from '../exceptions/bad-request.exception'; +import { Dataset } from '../entities/dataset/dataset'; +import { DatasetDTO } from '../dtos/dataset-dto'; +import { TranslationDTO } from '../dtos/translations-dto'; +import { DatasetRepository } from '../repositories/dataset'; +import { DataLakeService } from '../services/datalake'; +import { translatableMetadataKeys } from '../types/translatable-metadata'; + +import { loadDataset } from './dataset'; + +export const translationRouter = Router(); + +const upload = multer({ storage: multer.memoryStorage() }); + +// imported translation filename can be constant as we overwrite each time it's imported +const TRANSLATION_FILENAME = 'translation-import.csv'; + +const collectTranslations = (dataset: Dataset): TranslationDTO[] => { + const metadataEN = dataset.datasetInfo?.find((info) => info.language.includes('en')); + const metadataCY = dataset.datasetInfo?.find((info) => info.language.includes('cy')); + + // ignore roundingDescription if rounding isn't applied + const metadataKeys = translatableMetadataKeys.filter((key) => { + return metadataEN?.roundingApplied === true ? true : key !== 'roundingDescription'; + }); + + const translations: TranslationDTO[] = [ + ...dataset.dimensions?.map((dim) => ({ + type: 'dimension', + key: dim.factTableColumn, + english: dim.dimensionInfo?.find((info) => info.language.includes('en'))?.name, + cymraeg: dim.dimensionInfo?.find((info) => info.language.includes('cy'))?.name, + id: dim.id + })), + ...metadataKeys.map((prop) => ({ + type: 'metadata', + key: prop, + english: metadataEN?.[prop] as string, + cymraeg: metadataCY?.[prop] as string + })) + ]; + + return translations; +}; + +const parseUploadedTranslations = async (fileBuffer: Buffer): Promise => { + const translations: TranslationDTO[] = []; + + const csvParser: AsyncIterable = Readable.from(fileBuffer).pipe( + parse({ bom: true, columns: true }) + ); + + for await (const row of csvParser) { + translations.push(row); + } + + return translations; +}; + +translationRouter.get( + '/:dataset_id/preview', + loadDataset(), + async (req: Request, res: Response, next: NextFunction) => { + try { + logger.info('Previewing translations for export...'); + const dataset: Dataset = res.locals.dataset; + const translations = collectTranslations(dataset); + res.json(translations); + } catch (error) { + logger.error('Error previewing translations', error); + next(new UnknownException()); + } + } +); + +translationRouter.get('/:dataset_id/export', loadDataset(), async (req: Request, res: Response, next: NextFunction) => { + try { + logger.info('Exporting translations to CSV...'); + const dataset: Dataset = res.locals.dataset; + const translations = collectTranslations(dataset); + res.setHeader('Content-Type', 'text/csv'); + stringify(translations, { bom: true, header: true }).pipe(res); + } catch (error) { + logger.error('Error exporting translations', error); + next(new UnknownException()); + } +}); + +translationRouter.post( + '/:dataset_id/import', + upload.single('csv'), + loadDataset(), + async (req: Request, res: Response, next: NextFunction) => { + const dataset: Dataset = res.locals.dataset; + logger.info('Validating imported translations CSV...'); + + if (!req.file || !req.file.buffer) { + next(new BadRequestException('errors.upload.no_csv')); + return; + } + + try { + // check the csv has all the keys and values required + const existingTranslations = collectTranslations(dataset); + const newTranslations = await parseUploadedTranslations(req.file.buffer); + + // validate the translation import is what we're expecting + if (existingTranslations.length !== newTranslations.length) { + next(new BadRequestException('errors.translation_file.invalid.row_count')); + return; + } + + existingTranslations.forEach((oldTranslation) => { + const newTranslation = newTranslations.find( + (t) => oldTranslation.type === t.type && oldTranslation.key === t.key + ); + + if (!newTranslation) { + throw new BadRequestException('errors.translation_file.invalid.keys'); + } + }); + + // store the translation import in the datalake so we can use it once it's confirmed as correct + const datalake = new DataLakeService(); + await datalake.uploadFileBuffer(TRANSLATION_FILENAME, dataset.id, Buffer.from(req.file.buffer)); + + res.status(201); + res.json(DatasetDTO.fromDataset(dataset)); + } catch (error) { + if (error instanceof BadRequestException) { + next(error); + return; + } + logger.error('Error importing translations', error); + next(new UnknownException()); + } + } +); + +translationRouter.patch( + '/:dataset_id/import', + loadDataset(), + async (req: Request, res: Response, next: NextFunction) => { + let dataset: Dataset = res.locals.dataset; + logger.info('Updating translations from CSV...'); + + try { + const datalake = new DataLakeService(); + const fileBuffer = await datalake.getFileBuffer(TRANSLATION_FILENAME, dataset.id); + const newTranslations = await parseUploadedTranslations(fileBuffer); + dataset = await DatasetRepository.updateTranslations(dataset.id, newTranslations); + await datalake.deleteFile(TRANSLATION_FILENAME, dataset.id); + + res.status(201); + res.json(DatasetDTO.fromDataset(dataset)); + } catch (error) { + logger.error(error, 'Error updating translations'); + next(new UnknownException()); + } + } +); diff --git a/src/types/translatable-metadata.ts b/src/types/translatable-metadata.ts new file mode 100644 index 0000000..dced70a --- /dev/null +++ b/src/types/translatable-metadata.ts @@ -0,0 +1,11 @@ +import { DatasetInfo } from '../entities/dataset/dataset-info'; + +export const translatableMetadataKeys: (keyof DatasetInfo)[] = [ + 'title', + 'description', + 'collection', + 'quality', + 'roundingDescription' +] as const; + +export type TranslatableMetadataKey = (typeof translatableMetadataKeys)[number]; diff --git a/test/publisher-journey.test.ts b/test/publisher-journey.test.ts index e6f17bf..1307e6c 100644 --- a/test/publisher-journey.test.ts +++ b/test/publisher-journey.test.ts @@ -75,7 +75,7 @@ describe('API Endpoints', () => { }); test('Upload returns 400 if no file attached', async () => { - const dataset = await DatasetRepository.createWithTitle(user, 'en-GB', 'Test Dataset 1'); + const dataset = await DatasetRepository.createWithTitle(user, Locale.EnglishGb, 'Test Dataset 1'); const res = await request(app).post(`/dataset/${dataset.id}/data`).set(getAuthHeader(user)); expect(res.status).toBe(400); expect(res.body).toEqual({ error: 'No CSV data provided' }); @@ -83,7 +83,7 @@ describe('API Endpoints', () => { }); test('Upload returns 201 if a file is attached', async () => { - const dataset = await DatasetRepository.createWithTitle(user, 'en-GB', 'Test Dataset 2'); + const dataset = await DatasetRepository.createWithTitle(user, Locale.EnglishGb, 'Test Dataset 2'); const csvFile = path.resolve(__dirname, `sample-files/csv/sure-start-short.csv`); const res = await request(app) .post(`/dataset/${dataset.id}/data`)