From d5bed435bde3e33a929527bc61c8be4a944dda5a Mon Sep 17 00:00:00 2001 From: Jamie Maynard Date: Wed, 14 Aug 2024 17:44:37 +0100 Subject: [PATCH 1/5] Implenet new datamodel on the backend --- src/controllers/blob-storage.ts | 5 +- src/controllers/csv-processor.ts | 85 ++++++++++---------- src/dtos/upload-dto.ts | 2 +- src/entity2/csv_info.ts | 23 ++++++ src/entity2/dataset.ts | 31 ++++++++ src/entity2/dataset_info.ts | 23 ++++++ src/entity2/dimension.ts | 34 ++++++++ src/entity2/dimension_info.ts | 25 ++++++ src/entity2/import.ts | 30 +++++++ src/entity2/revision.ts | 55 +++++++++++++ src/entity2/source.ts | 35 +++++++++ src/entity2/user.ts | 49 ++++++++++++ src/route/dataset-route.ts | 130 ++++++++++++++++++------------- 13 files changed, 422 insertions(+), 105 deletions(-) create mode 100644 src/entity2/csv_info.ts create mode 100644 src/entity2/dataset.ts create mode 100644 src/entity2/dataset_info.ts create mode 100644 src/entity2/dimension.ts create mode 100644 src/entity2/dimension_info.ts create mode 100644 src/entity2/import.ts create mode 100644 src/entity2/revision.ts create mode 100644 src/entity2/source.ts create mode 100644 src/entity2/user.ts diff --git a/src/controllers/blob-storage.ts b/src/controllers/blob-storage.ts index 2779b11..0a044a1 100644 --- a/src/controllers/blob-storage.ts +++ b/src/controllers/blob-storage.ts @@ -1,4 +1,5 @@ import { BlobServiceClient, ContainerClient, StorageSharedKeyCredential } from '@azure/storage-blob'; +import { Readable } from 'stream'; import * as dotenv from 'dotenv'; import pino from 'pino'; @@ -42,7 +43,7 @@ export class BlobStorageService { return this.containerClient; } - public async uploadFile(fileName: string | undefined, fileContent: Buffer) { + public async uploadFile(fileName: string | undefined, fileContent: Readable) { if (fileName === undefined) { throw new Error('File name is undefined'); } @@ -52,7 +53,7 @@ export class BlobStorageService { const blockBlobClient = this.containerClient.getBlockBlobClient(fileName); - const uploadBlobResponse = await blockBlobClient.upload(fileContent, fileContent.length); + const uploadBlobResponse = await blockBlobClient.uploadStream(fileContent, fileContent.readableLength); return uploadBlobResponse; } diff --git a/src/controllers/csv-processor.ts b/src/controllers/csv-processor.ts index f178c3f..fe8b90c 100644 --- a/src/controllers/csv-processor.ts +++ b/src/controllers/csv-processor.ts @@ -1,12 +1,15 @@ /* eslint-disable import/no-cycle */ -import { createHash } from 'crypto'; +import { createHash, randomUUID } from 'crypto'; +import { Readable } from 'stream'; import { parse } from 'csv'; import { UploadDTO, UploadErrDTO } from '../dtos/upload-dto'; import { Error } from '../models/error'; -import { Datafile } from '../entity/datafile'; -import { Dataset } from '../entity/dataset'; +import { DatasetRevision } from '../entity2/revision'; + +import { Import } from 'src/entity2/import'; + import { ViewDTO, ViewErrDTO } from '../dtos/view-dto'; import { datasetToDatasetDTO } from '../dtos/dataset-dto'; import { ENGLISH, WELSH, logger, t } from '../app'; @@ -18,6 +21,21 @@ export const MAX_PAGE_SIZE = 500; export const MIN_PAGE_SIZE = 5; export const DEFAULT_PAGE_SIZE = 100; +function hashReadableStream(stream: Readable, algorithm: string = 'sha256'): Promise { + return new Promise((resolve, reject) => { + const hash = createHash(algorithm); + stream.on('data', (chunk) => { + hash.update(chunk); + }); + stream.on('end', () => { + resolve(hash.digest('hex')); + }); + stream.on('error', (err) => { + reject(err); + }); + }); +} + function paginate(array: Array, page_number: number, page_size: number): Array { const page = array.slice((page_number - 1) * page_size, page_number * page_size); return page; @@ -161,55 +179,30 @@ export const moveCSVFromBlobStorageToDatalake = async (dataset: Dataset): Promis } }; -export const uploadCSVToBlobStorage = async (buff: Buffer, dataset: Dataset): Promise => { +export const uploadCSVToBlobStorage = async (fileStream: Readable, filetype: string): Promise => { const blobStorageService = new BlobStorageService(); - if (buff) { - const hash = createHash('sha256').update(buff).digest('hex'); - const datafile = Datafile.createDatafile(dataset, hash.toString(), 'BetaUser'); - const savedDataFile = await datafile.save(); - const dto = await datasetToDatasetDTO(dataset); + if (fileStream) { + const importRecord = new Import(); + importRecord.id = randomUUID(); + importRecord.mime_type = filetype; try { - logger.debug(`Uploading file ${savedDataFile.id} to blob storage`); - await blobStorageService.uploadFile(`${savedDataFile.id}.csv`, buff); - return { - success: true, - dataset: dto - }; + await blobStorageService.uploadFile(`${importRecord.id}.csv`, fileStream); + const resolvedHash = await hashReadableStream(fileStream) + .then((hash) => { + return hash.toString(); + }) + .catch((error) => { + throw new Error(`Error hashing stream: ${error}`); + }); + if (resolvedHash) importRecord.file_hash = resolvedHash; + return await importRecord.save(); } catch (err) { logger.error(err); - datafile.remove(); - return { - success: false, - dataset: dto, - errors: [ - { - field: 'csv', - message: [ - { lang: ENGLISH, message: t('errors.upload_to_datalake', { lng: ENGLISH }) }, - { lang: WELSH, message: t('errors.upload_to_datalake', { lng: WELSH }) } - ], - tag: { name: 'errors.upload_to_datalake', params: {} } - } - ] - }; + throw new Error('Error processing file upload to blob storage'); } } else { - logger.debug('No buffer to upload to datalake'); - const dto = await datasetToDatasetDTO(dataset); - return { - success: false, - dataset: dto, - errors: [ - { - field: 'csv', - message: [ - { lang: ENGLISH, message: t('errors.no_csv_data', { lng: ENGLISH }) }, - { lang: WELSH, message: t('errors.no_csv_data', { lng: WELSH }) } - ], - tag: { name: 'errors.no_csv_data', params: {} } - } - ] - }; + logger.error('No buffer to upload to blob storage'); + throw new Error('No buffer to upload to blob storage'); } }; diff --git a/src/dtos/upload-dto.ts b/src/dtos/upload-dto.ts index 52b6427..f3de05a 100644 --- a/src/dtos/upload-dto.ts +++ b/src/dtos/upload-dto.ts @@ -10,6 +10,6 @@ export interface UploadDTO { export interface UploadErrDTO { success: boolean; - dataset: DatasetDTO; + dataset: DatasetDTO | undefined; errors: Error[]; } diff --git a/src/entity2/csv_info.ts b/src/entity2/csv_info.ts new file mode 100644 index 0000000..73b4638 --- /dev/null +++ b/src/entity2/csv_info.ts @@ -0,0 +1,23 @@ +import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, OneToOne, JoinColumn } from 'typeorm'; + +// eslint-disable-next-line import/no-cycle +import { Import } from './import'; + +@Entity() +export class CsvInfo extends BaseEntity { + @PrimaryGeneratedColumn('uuid') + import_id: string; + + @Column({ type: 'char', length: 1 }) + delimiter: string; + + @Column({ type: 'char', length: 1 }) + quote: string; + + @Column({ type: 'varchar', length: 2 }) + linebreak: string; + + @OneToOne(() => Import, (importEntity) => importEntity.csvInfo, { onDelete: 'CASCADE' }) + @JoinColumn({ name: 'import_id' }) + import: Import; +} diff --git a/src/entity2/dataset.ts b/src/entity2/dataset.ts new file mode 100644 index 0000000..afe959a --- /dev/null +++ b/src/entity2/dataset.ts @@ -0,0 +1,31 @@ +import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, ManyToOne, OneToMany, JoinColumn } from 'typeorm'; + +import { User } from './user'; +import { RevisionEntity } from './revision'; +// eslint-disable-next-line import/no-cycle +import { DatasetInfo } from './dataset_info'; + +@Entity() +export class Dataset extends BaseEntity { + @PrimaryGeneratedColumn('uuid') + id: string; + + @Column({ type: 'timestamp', default: () => 'CURRENT_TIMESTAMP' }) + creation_date: Date; + + @ManyToOne(() => User) + @JoinColumn({ name: 'created_by' }) + created_by: User; + + @Column({ type: 'timestamp', nullable: true }) + live: Date; + + @Column({ type: 'timestamp', nullable: true }) + archive: Date; + + @OneToMany(() => RevisionEntity, (revision) => revision.dataset) + revisions: RevisionEntity[]; + + @OneToMany(() => DatasetInfo, (datasetInfo) => datasetInfo.dataset) + datasetInfos: DatasetInfo[]; +} diff --git a/src/entity2/dataset_info.ts b/src/entity2/dataset_info.ts new file mode 100644 index 0000000..60ab4d2 --- /dev/null +++ b/src/entity2/dataset_info.ts @@ -0,0 +1,23 @@ +import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, ManyToOne, JoinColumn } from 'typeorm'; + +// eslint-disable-next-line import/no-cycle +import { Dataset } from './dataset'; + +@Entity() +export class DatasetInfo extends BaseEntity { + @PrimaryGeneratedColumn('uuid') + id: string; + + @Column({ type: 'varchar', length: 5, nullable: true }) + language: string; + + @Column({ type: 'text', nullable: true }) + title: string; + + @Column({ type: 'text', nullable: true }) + description: string; + + @ManyToOne(() => Dataset, (dataset) => dataset.datasetInfos) + @JoinColumn({ name: 'dataset_id' }) + dataset: Dataset; +} diff --git a/src/entity2/dimension.ts b/src/entity2/dimension.ts new file mode 100644 index 0000000..a1db7c5 --- /dev/null +++ b/src/entity2/dimension.ts @@ -0,0 +1,34 @@ +import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, ManyToOne, OneToMany, JoinColumn } from 'typeorm'; + +import { Dataset } from './dataset'; +import { RevisionEntity } from './revision'; +// eslint-disable-next-line import/no-cycle +import { DimensionInfo } from './dimension_info'; + +@Entity() +export class Dimension extends BaseEntity { + @PrimaryGeneratedColumn('uuid') + id: string; + + @ManyToOne(() => Dataset) + @JoinColumn({ name: 'dataset_id' }) + dataset: Dataset; + + // Replace with actual enum types + @Column({ type: 'enum', enum: ['type1', 'type2'], nullable: false }) + type: string; + + @ManyToOne(() => RevisionEntity) + @JoinColumn({ name: 'start_revision_id' }) + start_revision: RevisionEntity; + + @ManyToOne(() => RevisionEntity, { nullable: true }) + @JoinColumn({ name: 'finish_revision_id' }) + finish_revision: RevisionEntity; + + @Column({ type: 'text', nullable: true }) + validator: string; + + @OneToMany(() => DimensionInfo, (dimensionInfo) => dimensionInfo.dimension) + dimensionInfos: DimensionInfo[]; +} diff --git a/src/entity2/dimension_info.ts b/src/entity2/dimension_info.ts new file mode 100644 index 0000000..fb973a2 --- /dev/null +++ b/src/entity2/dimension_info.ts @@ -0,0 +1,25 @@ +import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, ManyToOne, JoinColumn } from 'typeorm'; + +import { Dimension } from './dimension'; + +@Entity() +export class DimensionInfo extends BaseEntity { + @PrimaryGeneratedColumn('uuid') + id: string; + + @Column({ type: 'varchar', length: 5, nullable: true }) + language: string; + + @Column({ type: 'text' }) + name: string; + + @Column({ type: 'text', nullable: true }) + description: string; + + @Column({ type: 'text', nullable: true }) + notes: string; + + @ManyToOne(() => Dimension, (dimension) => dimension.dimensionInfos) + @JoinColumn({ name: 'dimension_id' }) + dimension: Dimension; +} diff --git a/src/entity2/import.ts b/src/entity2/import.ts new file mode 100644 index 0000000..0b3eb29 --- /dev/null +++ b/src/entity2/import.ts @@ -0,0 +1,30 @@ +import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, ManyToOne, OneToOne, JoinColumn } from 'typeorm'; + +import { RevisionEntity } from './revision'; +// eslint-disable-next-line import/no-cycle +import { CsvInfo } from './csv_info'; +// eslint-disable-next-line import/no-cycle +import { Source } from './source'; + +@Entity() +export class Import extends BaseEntity { + @PrimaryGeneratedColumn('uuid') + id: string; + + @ManyToOne(() => RevisionEntity) + @JoinColumn({ name: 'revision_id' }) + revision: RevisionEntity; + + @OneToOne(() => CsvInfo, (csvInfo) => csvInfo.import, { onDelete: 'CASCADE' }) + @JoinColumn({ name: 'csv_info' }) + csvInfo: CsvInfo; + + @Column({ type: 'varchar', length: 255 }) + mime_type: string; + + @Column({ type: 'varchar', length: 255 }) + filename: string; + + @OneToOne(() => Source, (source) => source.import) + source: Source; +} diff --git a/src/entity2/revision.ts b/src/entity2/revision.ts new file mode 100644 index 0000000..f6a78c5 --- /dev/null +++ b/src/entity2/revision.ts @@ -0,0 +1,55 @@ +import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, JoinColumn, ManyToOne } from 'typeorm'; + +// eslint-disable-next-line import/no-cycle +import { Dataset } from './dataset'; +import { User } from './user'; + +interface Revision { + id: string; + revision_index: number; + dataset: Dataset; + creation_date: Date; + previous_revision: Revision; + online_cube_filename: string; + publish_date: Date; + approval_date: Date; + approved_by: User; + created_by: User; +} + +@Entity() +export class RevisionEntity extends BaseEntity implements Revision { + @PrimaryGeneratedColumn('uuid') + id: string; + + @Column({ type: 'int' }) + revision_index: number; + + @ManyToOne(() => Dataset, (dataset) => dataset.revisions) + @JoinColumn({ name: 'dataset_id' }) + dataset: Dataset; + + @Column({ type: 'timestamp', default: () => 'CURRENT_TIMESTAMP' }) + creation_date: Date; + + @ManyToOne(() => RevisionEntity, { nullable: true }) + @JoinColumn({ name: 'previous_revision_id' }) + previous_revision: Revision; + + @Column({ type: 'varchar', length: 255, nullable: true }) + online_cube_filename: string; + + @Column({ type: 'timestamp', nullable: true }) + publish_date: Date; + + @Column({ type: 'timestamp', nullable: true }) + approval_date: Date; + + @ManyToOne(() => User, { nullable: true }) + @JoinColumn({ name: 'approved_by' }) + approved_by: User; + + @ManyToOne(() => User) + @JoinColumn({ name: 'created_by' }) + created_by: User; +} diff --git a/src/entity2/source.ts b/src/entity2/source.ts new file mode 100644 index 0000000..7337b0a --- /dev/null +++ b/src/entity2/source.ts @@ -0,0 +1,35 @@ +import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, ManyToOne, OneToOne, JoinColumn } from 'typeorm'; + +import { Dimension } from './dimension'; +// eslint-disable-next-line import/no-cycle +import { Import } from './import'; +import { RevisionEntity } from './revision'; + +@Entity() +export class Source extends BaseEntity { + @PrimaryGeneratedColumn('uuid') + id: string; + + @ManyToOne(() => Dimension) + @JoinColumn({ name: 'dimension_id' }) + dimension: Dimension; + + @OneToOne(() => Import, (importEntity) => importEntity.source, { nullable: false }) + @JoinColumn({ name: 'import_id' }) + import: Import; + + @ManyToOne(() => RevisionEntity) + @JoinColumn({ name: 'revision_id' }) + revision: RevisionEntity; + + @ManyToOne(() => RevisionEntity) + @JoinColumn({ name: 'lookup_table_revision_id' }) + lookupTableRevision: RevisionEntity; + + @Column({ type: 'text' }) + csv_field: string; + + // Replace with actual enum types + @Column({ type: 'enum', enum: ['action1', 'action2'], nullable: false }) + action: string; +} diff --git a/src/entity2/user.ts b/src/entity2/user.ts new file mode 100644 index 0000000..3b5a634 --- /dev/null +++ b/src/entity2/user.ts @@ -0,0 +1,49 @@ +import { Entity, PrimaryGeneratedColumn, Column, BaseEntity } from 'typeorm'; + +@Entity() +export class User extends BaseEntity { + @PrimaryGeneratedColumn('uuid') + id: string; + + @Column({ unique: true }) + username: string; + + @Column({ unique: true }) + email: string; + + @Column({ nullable: true, unique: true }) + oidc_subject: string; + + @Column({ nullable: true }) + oidc_issuer: string; + + @Column({ type: 'text', nullable: true }) + access_token: string; + + @Column({ type: 'text', nullable: true }) + refresh_token: string; + + @Column({ type: 'text', nullable: true }) + id_token: string; + + @Column({ type: 'timestamp', nullable: true }) + token_expiry: Date; + + @Column({ nullable: true }) + first_name: string; + + @Column({ nullable: true }) + last_name: string; + + @Column({ nullable: true }) + profile_picture: string; + + @Column({ type: 'timestamp', default: () => 'CURRENT_TIMESTAMP' }) + created_at: Date; + + @Column({ type: 'timestamp', default: () => 'CURRENT_TIMESTAMP', onUpdate: 'CURRENT_TIMESTAMP' }) + updated_at: Date; + + @Column({ type: 'boolean', default: true }) + active: boolean; +} diff --git a/src/route/dataset-route.ts b/src/route/dataset-route.ts index 65e73d2..67973f5 100644 --- a/src/route/dataset-route.ts +++ b/src/route/dataset-route.ts @@ -1,6 +1,8 @@ +/* eslint-disable no-warning-comments */ /* eslint-disable import/no-cycle */ import { Request, Response, Router } from 'express'; import multer from 'multer'; +import pino from 'pino'; import { ViewErrDTO } from '../dtos/view-dto'; import { ENGLISH, WELSH, t } from '../app'; @@ -11,11 +13,18 @@ import { DEFAULT_PAGE_SIZE } from '../controllers/csv-processor'; import { DataLakeService } from '../controllers/datalake'; -import { Dataset } from '../entity/dataset'; -import { Datafile } from '../entity/datafile'; +import { Dataset } from '../entity2/dataset'; +import { DatasetInfo } from '../entity2/dataset_info'; +import { DatasetRevision } from '../entity2/revision'; +import { Import } from '../entity2/import'; import { FileDescription } from '../models/filelist'; import { datasetToDatasetDTO } from '../dtos/dataset-dto'; +export const logger = pino({ + name: 'StatsWales-Alpha-App: DatasetRoute', + level: 'debug' +}); + const storage = multer.memoryStorage(); const upload = multer({ storage }); export const apiRoute = Router(); @@ -39,72 +48,81 @@ function checkDatasetID(datasetID: string, res: Response): boolean { return true; } -apiRoute.post('/', upload.single('csv'), async (req: Request, res: Response) => { - if (!req.file) { - const err: ViewErrDTO = { - success: false, - dataset_id: undefined, - errors: [ - { - field: 'csv', - message: [ - { - lang: ENGLISH, - message: t('errors.no_csv_data', { lng: ENGLISH }) - }, - { - lang: WELSH, - message: t('errors.no_csv_data', { lng: WELSH }) - } - ], - tag: { - name: 'errors.no_csv_data', - params: {} +function errorDtoGenerator( + field: string, + translationString: string, + datasetID: string | undefined = undefined +): ViewErrDTO { + return { + success: false, + dataset_id: datasetID, + errors: [ + { + field, + message: [ + { + lang: ENGLISH, + message: t(translationString, { lng: ENGLISH }) + }, + { + lang: WELSH, + message: t(translationString, { lng: WELSH }) } + ], + tag: { + name: translationString, + params: {} } - ] - }; + } + ] + }; +} + + +apiRoute.post('/', upload.single('csv'), async (req: Request, res: Response) => { + if (!req.file) { res.status(400); - res.json(err); + res.json(errorDtoGenerator('csv', 'errors.no_csv_data')); return; } const lang: string = req.body?.language || req.i18n.language; const title: string = req.body?.title; if (!title) { - const err: ViewErrDTO = { - success: false, - dataset_id: undefined, - errors: [ - { - field: 'title', - message: [ - { - lang: ENGLISH, - message: t('errors.no_title', { lng: ENGLISH }) - }, - { - lang: WELSH, - message: t('errors.no_title', { lng: WELSH }) - } - ], - tag: { - name: 'errors.no_title', - params: {} - } - } - ] - }; res.status(400); - res.json(err); + res.json(errorDtoGenerator('title', 'errors.no_title')); return; } - const dataset = Dataset.createDataset(title, 'BetaUser'); - const saved_dataset_record = await dataset.save(); - saved_dataset_record.addTitleByString(title, lang); - const uploadDTO = await uploadCSVToBlobStorage(req.file?.buffer, saved_dataset_record); - if (!uploadDTO.success) { - res.status(400); + let importRecord: Import; + try { + importRecord = await uploadCSVToBlobStorage(req.file?.stream, req.file?.mimetype); + } catch (err) { + logger.error(`An error occured trying to upload the file with the following error: ${e}`); + res.status(500); + res.json({ message: 'Error uploading file' }); + return; } + + // Everything looks good so far, let's create the dataset and revision records + const dataset = new Dataset(); + dataset.creation_date = new Date(); + // TODO change how we handle authentication to get the user on the Backend + dataset.created_by = 'Test User'; + const saved_dataset_record = await dataset.save(); + const datasetInfo = new DatasetInfo(); + datasetInfo.language = lang; + datasetInfo.title = title; + datasetInfo.dataset = saved_dataset_record; + datasetInfo.save(); + const revision = new DatasetRevision(); + revision.dataset = saved_dataset_record; + revision.revision_index = 1; + revision.creation_date = new Date(); + // TODO change how we handle authentication to get the user on the Backend + revision.created_by = 'Test User'; + const saved_revision_record = await revision.save(); + importRecord.revision = saved_revision_record; + importRecord.save(); + res.json(uploadDTO); }); From e091c98b885e36a906cfe215034315ae313e9636 Mon Sep 17 00:00:00 2001 From: Jamie Maynard Date: Wed, 21 Aug 2024 17:06:27 +0100 Subject: [PATCH 2/5] Improvements to datamodel and working code --- .eslintrc | 3 +- src/controllers/blob-storage.ts | 26 +- src/controllers/csv-processor.ts | 288 ++++--------- src/dtos/datafile-dto.ts | 8 - src/dtos/dataset-dto.ts | 85 ---- src/dtos2/dataset-dto.ts | 388 ++++++++++++++++++ src/{models => dtos2}/error.ts | 0 src/dtos2/filelist.ts | 13 + src/{models => dtos2}/processedcsv.ts | 3 +- src/{dtos => dtos2}/upload-dto.ts | 4 +- src/{dtos => dtos2}/view-dto.ts | 7 +- src/entity2/dataset.ts | 6 + src/entity2/dataset_info.ts | 5 +- src/entity2/dimension.ts | 6 + src/entity2/dimension_info.ts | 6 +- src/entity2/import.ts | 28 +- src/entity2/revision.ts | 8 +- src/entity2/source.ts | 12 +- src/entity2/user.ts | 28 +- src/migration/1713284688846-dataset.ts | 29 -- src/migration/1716456713793-datafiles.ts | 26 -- src/migration/1716456733681-dataset_title.ts | 21 - .../1716456738219-dataset_description.ts | 21 - src/migration/1716542997594-dataset_column.ts | 22 - src/migration/1716542997596-lookup_table.ts | 27 -- src/migration/1716543109797-column_title.ts | 21 - src/migration/1723729297617-migration.ts | 128 ++++++ src/models/filelist.ts | 8 - src/route/dataset-route.ts | 284 ++++++++----- test/dataset.test.ts | 4 +- 30 files changed, 896 insertions(+), 619 deletions(-) delete mode 100644 src/dtos/datafile-dto.ts delete mode 100644 src/dtos/dataset-dto.ts create mode 100644 src/dtos2/dataset-dto.ts rename src/{models => dtos2}/error.ts (100%) create mode 100644 src/dtos2/filelist.ts rename src/{models => dtos2}/processedcsv.ts (92%) rename src/{dtos => dtos2}/upload-dto.ts (74%) rename src/{dtos => dtos2}/view-dto.ts (81%) delete mode 100644 src/migration/1713284688846-dataset.ts delete mode 100644 src/migration/1716456713793-datafiles.ts delete mode 100644 src/migration/1716456733681-dataset_title.ts delete mode 100644 src/migration/1716456738219-dataset_description.ts delete mode 100644 src/migration/1716542997594-dataset_column.ts delete mode 100644 src/migration/1716542997596-lookup_table.ts delete mode 100644 src/migration/1716543109797-column_title.ts create mode 100644 src/migration/1723729297617-migration.ts delete mode 100644 src/models/filelist.ts diff --git a/.eslintrc b/.eslintrc index 6874c4a..9ec23c3 100644 --- a/.eslintrc +++ b/.eslintrc @@ -21,7 +21,8 @@ "rules": { "prettier/prettier": 2, "no-console": 0, - "no-process-env": 0 + "no-process-env": 0, + "no-inline-comments": 1 }, "globals": { "NodeJS": true diff --git a/src/controllers/blob-storage.ts b/src/controllers/blob-storage.ts index 0a044a1..1cd9943 100644 --- a/src/controllers/blob-storage.ts +++ b/src/controllers/blob-storage.ts @@ -1,5 +1,11 @@ -import { BlobServiceClient, ContainerClient, StorageSharedKeyCredential } from '@azure/storage-blob'; import { Readable } from 'stream'; + +import { + BlobServiceClient, + BlobUploadCommonResponse, + ContainerClient, + StorageSharedKeyCredential +} from '@azure/storage-blob'; import * as dotenv from 'dotenv'; import pino from 'pino'; @@ -53,7 +59,10 @@ export class BlobStorageService { const blockBlobClient = this.containerClient.getBlockBlobClient(fileName); - const uploadBlobResponse = await blockBlobClient.uploadStream(fileContent, fileContent.readableLength); + const uploadBlobResponse: BlobUploadCommonResponse = await blockBlobClient.uploadStream( + fileContent, + fileContent.readableLength + ); return uploadBlobResponse; } @@ -85,7 +94,18 @@ export class BlobStorageService { if (chunk instanceof Buffer) chunks.push(chunk); else chunks.push(Buffer.from(chunk)); } - return Buffer.concat(chunks).toString('utf-8'); + return Buffer.concat(chunks); + } + + public async getReadableStream(fileName: string) { + const blockBlobClient = this.containerClient.getBlockBlobClient(fileName); + const downloadBlockBlobResponse = await blockBlobClient.download(); + const readableStreamBody: ReadableStream | undefined = downloadBlockBlobResponse.readableStreamBody; + + if (!readableStreamBody) { + throw new Error('Failed to get readable stream body from download response.'); + } + return readableStreamBody; } public async readFileToBuffer(fileName: string) { diff --git a/src/controllers/csv-processor.ts b/src/controllers/csv-processor.ts index fe8b90c..0dbe380 100644 --- a/src/controllers/csv-processor.ts +++ b/src/controllers/csv-processor.ts @@ -4,18 +4,15 @@ import { Readable } from 'stream'; import { parse } from 'csv'; -import { UploadDTO, UploadErrDTO } from '../dtos/upload-dto'; -import { Error } from '../models/error'; -import { DatasetRevision } from '../entity2/revision'; - -import { Import } from 'src/entity2/import'; - -import { ViewDTO, ViewErrDTO } from '../dtos/view-dto'; -import { datasetToDatasetDTO } from '../dtos/dataset-dto'; import { ENGLISH, WELSH, logger, t } from '../app'; +import { DatasetDTO, ImportDTO } from '../dtos2/dataset-dto'; +import { Error } from '../dtos2/error'; +import { ViewDTO, ViewErrDTO } from '../dtos2/view-dto'; +import { Dataset } from '../entity2/dataset'; +import { Import } from '../entity2/import'; -import { DataLakeService } from './datalake'; import { BlobStorageService } from './blob-storage'; +import { DataLakeService } from './datalake'; export const MAX_PAGE_SIZE = 500; export const MIN_PAGE_SIZE = 5; @@ -120,82 +117,32 @@ function validateParams(page_number: number, max_page_number: number, page_size: return errors; } -export const moveCSVFromBlobStorageToDatalake = async (dataset: Dataset): Promise => { - const blobStorageService = new BlobStorageService(); - const dataLakeService = new DataLakeService(); - const datafiles = await dataset.datafiles; - const datafile: Datafile | undefined = datafiles - .filter((filterfile: Datafile) => filterfile.draft === false) - .sort( - (first: Datafile, second: Datafile) => - new Date(second.creationDate).getTime() - new Date(first.creationDate).getTime() - ) - .shift(); - const dto = await datasetToDatasetDTO(dataset); - if (datafile) { - try { - logger.debug(`Moving file ${datafile.id} from blob storage to datalake`); - const buff: Buffer = await blobStorageService.readFileToBuffer(`${datafile.id}.csv`); - await dataLakeService.uploadFile(`${datafile.id}.csv`, buff); - datafile.draft = false; - await datafile.save(); - await blobStorageService.deleteFile(`${datafile.id}.csv`); - return { - success: true, - dataset: dto - }; - } catch (err) { - logger.error(err); - return { - success: false, - dataset: dto, - errors: [ - { - field: 'csv', - message: [ - { lang: ENGLISH, message: t('errors.move_to_datalake', { lng: ENGLISH }) }, - { lang: WELSH, message: t('errors.move_to_datalake', { lng: WELSH }) } - ], - tag: { name: 'errors.move_to_datalake', params: {} } - } - ] - }; - } - } else { - return { - success: false, - dataset: dto, - errors: [ - { - field: 'csv', - message: [ - { lang: ENGLISH, message: t('errors.no_csv', { lng: ENGLISH }) }, - { lang: WELSH, message: t('errors.no_csv', { lng: WELSH }) } - ], - tag: { name: 'errors.no_csv', params: {} } - } - ] - }; - } -}; - export const uploadCSVToBlobStorage = async (fileStream: Readable, filetype: string): Promise => { const blobStorageService = new BlobStorageService(); if (fileStream) { const importRecord = new Import(); importRecord.id = randomUUID(); importRecord.mime_type = filetype; + if (filetype === 'text/csv') { + importRecord.filename = `${importRecord.id}.csv`; + } else { + importRecord.filename = `${importRecord.id}.zip`; + } try { - await blobStorageService.uploadFile(`${importRecord.id}.csv`, fileStream); - const resolvedHash = await hashReadableStream(fileStream) + const promisedHash = hashReadableStream(fileStream) .then((hash) => { return hash.toString(); }) .catch((error) => { throw new Error(`Error hashing stream: ${error}`); }); - if (resolvedHash) importRecord.file_hash = resolvedHash; - return await importRecord.save(); + await blobStorageService.uploadFile(`${importRecord.id}.csv`, fileStream); + const resolvedHash = await promisedHash; + if (resolvedHash) importRecord.hash = resolvedHash; + importRecord.uploaded_at = new Date(Date.now()); + importRecord.type = 'Draft'; + importRecord.location = 'BlobStorage'; + return importRecord; } catch (err) { logger.error(err); throw new Error('Error processing file upload to blob storage'); @@ -216,80 +163,67 @@ function setupPagination(page: number, total_pages: number): Array { + const dataArray: Array> = (await parse(buffer, { + delimiter: ',' + }).toArray()) as string[][]; + const csvheaders = dataArray.shift(); + const total_pages = Math.ceil(dataArray.length / size); + const errors = validateParams(page, total_pages, size); + if (errors.length > 0) { + return { + success: false, + errors, + dataset_id: dataset.id + }; + } + + const csvdata = paginate(dataArray, page, size); + const pages = setupPagination(page, total_pages); + const end_record = () => { + if (size > dataArray.length) { + return dataArray.length; + } else if (page === total_pages) { + return dataArray.length; + } else { + return page * size; + } + }; + + return { + success: true, + dataset: DatasetDTO.fromDatasetShallow(dataset), + import: ImportDTO.fromImport(importObj), + current_page: page, + page_info: { + total_records: dataArray.length, + start_record: (page - 1) * size + 1, + end_record: end_record() + }, + pages, + page_size: size, + total_pages, + headers: csvheaders, + data: csvdata + }; +} + export const processCSVFromDatalake = async ( dataset: Dataset, + importObj: Import, page: number, size: number ): Promise => { const datalakeService = new DataLakeService(); + let buff: Buffer; try { - const datafiles = await dataset.datafiles; - const datafile: Datafile | undefined = datafiles - .filter((filterfile: Datafile) => filterfile.draft === false) - .sort( - (first: Datafile, second: Datafile) => - new Date(second.creationDate).getTime() - new Date(first.creationDate).getTime() - ) - .shift(); - if (datafile === undefined || datafile === null) { - return { - success: false, - errors: [ - { - field: 'dataset', - message: [ - { lang: ENGLISH, message: t('errors.no_datafile', { lng: ENGLISH }) }, - { lang: WELSH, message: t('errors.no_datafile', { lng: WELSH }) } - ], - tag: { name: 'erorors.no_datafile', params: {} } - } - ], - dataset_id: dataset.id - }; - } - const buff = await datalakeService.downloadFile(`${datafile.id}.csv`); - - const dataArray: Array> = (await parse(buff, { - delimiter: ',' - }).toArray()) as string[][]; - const csvheaders = dataArray.shift(); - const total_pages = Math.ceil(dataArray.length / size); - const errors = validateParams(page, total_pages, size); - if (errors.length > 0) { - return { - success: false, - errors, - dataset_id: dataset.id - }; - } - - const csvdata = paginate(dataArray, page, size); - const pages = setupPagination(page, total_pages); - const end_record = () => { - if (size > dataArray.length) { - return dataArray.length; - } else if (page === total_pages) { - return dataArray.length; - } else { - return page * size; - } - }; - const dto = await datasetToDatasetDTO(dataset); - return { - success: true, - dataset: dto, - current_page: page, - page_info: { - total_records: dataArray.length, - start_record: (page - 1) * size + 1, - end_record: end_record() - }, - pages, - page_size: size, - total_pages, - headers: csvheaders, - data: csvdata - }; + buff = await datalakeService.downloadFile(importObj.filename); } catch (err) { logger.error(err); return { @@ -307,82 +241,19 @@ export const processCSVFromDatalake = async ( dataset_id: dataset.id }; } + return processCSVData(buff, page, size, dataset, importObj); }; export const processCSVFromBlobStorage = async ( dataset: Dataset, + importObj: Import, page: number, size: number ): Promise => { const blobStoageService = new BlobStorageService(); + let buff: Buffer; try { - const datafiles = await dataset.datafiles; - const datafile: Datafile | undefined = datafiles - .filter((filterfile: Datafile) => filterfile.draft === true) - .sort( - (first: Datafile, second: Datafile) => - new Date(second.creationDate).getTime() - new Date(first.creationDate).getTime() - ) - .shift(); - if (datafile === undefined || datafile === null) { - return { - success: false, - errors: [ - { - field: 'dataset', - message: [ - { lang: ENGLISH, message: t('errors.no_datafile', { lng: ENGLISH }) }, - { lang: WELSH, message: t('errors.no_datafile', { lng: WELSH }) } - ], - tag: { name: 'erorors.no_datafile', params: {} } - } - ], - dataset_id: dataset.id - }; - } - const buff = await blobStoageService.readFile(`${datafile.id}.csv`); - - const dataArray: Array> = (await parse(buff, { - delimiter: ',' - }).toArray()) as string[][]; - const csvheaders = dataArray.shift(); - const total_pages = Math.ceil(dataArray.length / size); - const errors = validateParams(page, total_pages, size); - if (errors.length > 0) { - return { - success: false, - errors, - dataset_id: dataset.id - }; - } - - const csvdata = paginate(dataArray, page, size); - const pages = setupPagination(page, total_pages); - const end_record = () => { - if (size > dataArray.length) { - return dataArray.length; - } else if (page === total_pages) { - return dataArray.length; - } else { - return page * size; - } - }; - const dto = await datasetToDatasetDTO(dataset); - return { - success: true, - dataset: dto, - current_page: page, - page_info: { - total_records: dataArray.length, - start_record: (page - 1) * size + 1, - end_record: end_record() - }, - pages, - page_size: size, - total_pages, - headers: csvheaders, - data: csvdata - }; + buff = await blobStoageService.readFile(importObj.filename); } catch (err) { logger.error(err); return { @@ -391,8 +262,8 @@ export const processCSVFromBlobStorage = async ( { field: 'csv', message: [ - { lang: ENGLISH, message: t('errors.download_from_datalake', { lng: ENGLISH }) }, - { lang: WELSH, message: t('errors.download_from_datalake', { lng: WELSH }) } + { lang: ENGLISH, message: t('errors.download_from_blobstorage', { lng: ENGLISH }) }, + { lang: WELSH, message: t('errors.download_from_blobstorage', { lng: WELSH }) } ], tag: { name: 'errors.download_from_datalake', params: {} } } @@ -400,4 +271,5 @@ export const processCSVFromBlobStorage = async ( dataset_id: dataset.id }; } + return processCSVData(buff, page, size, dataset, importObj); }; diff --git a/src/dtos/datafile-dto.ts b/src/dtos/datafile-dto.ts deleted file mode 100644 index b2c3b38..0000000 --- a/src/dtos/datafile-dto.ts +++ /dev/null @@ -1,8 +0,0 @@ -export interface DatafileDTO { - id: string; - sha256hash: string; - created_by: string; - draft: boolean; - creation_date: Date; - csv_link: string; -} diff --git a/src/dtos/dataset-dto.ts b/src/dtos/dataset-dto.ts deleted file mode 100644 index 6e37126..0000000 --- a/src/dtos/dataset-dto.ts +++ /dev/null @@ -1,85 +0,0 @@ -/* eslint-disable import/no-cycle */ -import { Dataset } from '../entity/dataset'; - -export interface DatafileDTO { - id: string; - sha256hash: string; - created_by: string; - creation_date: string; -} - -export interface DatasetDescriptionDTO { - description: string; - language: string; -} - -export interface DatasetTitleDTO { - title: string; - language: string; -} - -export interface DatasetDTO { - id: string; - code: string; - internal_name: string; - title: DatasetTitleDTO[]; - description: DatasetDescriptionDTO[]; - creation_date: string; - created_by: string; - modification_date: string; - modified_by: string; - live: boolean; - datafiles: DatafileDTO[]; - csv_link: string; - xslx_link: string; - view_link: string; -} - -export async function datasetToDatasetDTO(dataset: Dataset): Promise { - const datasetTitleDtos: DatasetTitleDTO[] = []; - const datasetDescriptionDtos: DatasetDescriptionDTO[] = []; - const datafilesDtos: DatafileDTO[] = []; - - const titles = await dataset.title; - for (const title of titles) { - datasetTitleDtos.push({ - title: title.title, - language: title.languageCode - }); - } - - const descriptions = await dataset.description; - for (const desc of descriptions) { - datasetDescriptionDtos.push({ - description: desc.description, - language: desc.languageCode - }); - } - - const datafiles = await dataset.datafiles; - for (const dfile of datafiles) { - datafilesDtos.push({ - id: dfile.id, - sha256hash: dfile.sha256hash, - created_by: dfile.createdBy, - creation_date: dfile.creationDate.toISOString() - }); - } - - return { - id: dataset.id, - code: dataset.code, - internal_name: dataset.internalName, - title: datasetTitleDtos, - description: datasetDescriptionDtos, - creation_date: dataset.creationDate.toString(), - created_by: dataset.createdBy, - modification_date: dataset.lastModified.toString(), - modified_by: dataset.modifiedBy, - live: dataset.live, - datafiles: datafilesDtos, - csv_link: `/dataset/${dataset.id}/csv`, - xslx_link: `/dataset/${dataset.id}/xlsx`, - view_link: `/dataset/${dataset.id}/view` - }; -} diff --git a/src/dtos2/dataset-dto.ts b/src/dtos2/dataset-dto.ts new file mode 100644 index 0000000..078d144 --- /dev/null +++ b/src/dtos2/dataset-dto.ts @@ -0,0 +1,388 @@ +import { Dataset } from '../entity2/dataset'; +import { Dimension } from '../entity2/dimension'; +import { DimensionInfo } from '../entity2/dimension_info'; +import { Source } from '../entity2/source'; +import { Import } from '../entity2/import'; +import { RevisionEntity } from '../entity2/revision'; +import { DatasetInfo } from '../entity2/dataset_info'; + +export class DatasetInfoDTO { + language?: string; + title?: string; + description?: string; +} + +export class DimensionInfoDTO { + language?: string; + name: string; + description?: string; + notes?: string; +} + +export class SourceDTO { + id: string; + import_id: string; + revision_id: string; + // Commented out as we don't have lookup tables yet + // lookup_table_revision_id?: string; + csv_field: string; + action: string; +} + +export class DimensionDTO { + id: string; + type: string; + start_revision_id: string; + finish_revision_id?: string; + validator?: string; + sources?: SourceDTO[]; + dimensionInfos?: DimensionInfoDTO[]; + dataset_id?: string; + + static fromDimension(dimension: Dimension): DimensionDTO { + const dto = new DimensionDTO(); + dto.id = dimension.id; + dto.type = dimension.type; + dto.start_revision_id = dimension.start_revision.id; + dto.finish_revision_id = dimension.finish_revision ? dimension.finish_revision.id : undefined; + dto.validator = dimension.validator; + dto.dimensionInfos = dimension.dimensionInfos.map((dimensionInfo: DimensionInfo) => { + const infoDto = new DimensionInfoDTO(); + infoDto.language = dimensionInfo.language; + infoDto.name = dimensionInfo.name; + infoDto.description = dimensionInfo.description; + infoDto.notes = dimensionInfo.notes; + return infoDto; + }); + dto.sources = dimension.sources.map((source: Source) => { + const sourceDto = new SourceDTO(); + sourceDto.id = source.id; + sourceDto.import_id = source.import.id; + sourceDto.revision_id = source.revision.id; + sourceDto.csv_field = source.csv_field; + sourceDto.action = source.action; + return sourceDto; + }); + dto.dataset_id = dimension.dataset.id; + return dto; + } +} + +export class ImportDTO { + id: string; + revision_id: string; + mime_type: string; + filename: string; + hash: string; + uploaded_at: Date; + type: string; + location: string; + + static fromImport(importEntity: Import): ImportDTO { + const dto = new ImportDTO(); + dto.id = importEntity.id; + dto.revision_id = importEntity.revision.id; + dto.mime_type = importEntity.mime_type; + dto.filename = importEntity.filename; + dto.hash = importEntity.hash; + dto.uploaded_at = importEntity.uploaded_at; + dto.type = importEntity.type; + dto.location = importEntity.location; + return dto; + } +} + +export class RevisionDTO { + id: string; + revision_index: number; + creation_date: Date; + previous_revision_id?: string; + online_cube_filename?: string; + publish_date?: Date; + approval_date?: Date; + approved_by?: string; + created_by: string; + imports: ImportDTO[]; + dataset_id?: string; + + static fromRevision(revision: RevisionEntity): RevisionDTO { + const dto = new RevisionDTO(); + dto.id = revision.id; + dto.revision_index = revision.revision_index; + dto.dataset_id = revision.dataset.id; + dto.creation_date = revision.creation_date; + dto.previous_revision_id = revision.previous_revision ? revision.previous_revision.id : undefined; + dto.online_cube_filename = revision.online_cube_filename; + dto.publish_date = revision.publish_date; + dto.approval_date = revision.approval_date; + dto.approved_by = revision.approved_by ? revision.approved_by.name : undefined; + dto.created_by = revision.created_by.name; + dto.imports = revision.imports.map((importEntity: Import) => { + return ImportDTO.fromImport(importEntity); + }); + return dto; + } +} + +export class DatasetDTO { + id: string; + creation_date: Date; + created_by: string; + live?: Date; + archive?: Date; + dimensions?: DimensionDTO[]; + revisions?: RevisionDTO[]; + datasetInfos?: DatasetInfoDTO[]; + + static fromDatasetComplete(dataset: Dataset): DatasetDTO { + const dto = new DatasetDTO(); + dto.id = dataset.id; + dto.creation_date = dataset.creation_date; + dto.created_by = dataset.created_by.name; + dto.live = dataset.live; + dto.archive = dataset.archive; + dto.datasetInfos = dataset.datasetInfos.map((datasetInfo: DatasetInfo) => { + const infoDto = new DatasetInfoDTO(); + infoDto.language = datasetInfo.language; + infoDto.title = datasetInfo.title; + infoDto.description = datasetInfo.description; + return infoDto; + }); + dto.dimensions = dataset.dimensions.map((dimension: Dimension) => { + const dimDto = new DimensionDTO(); + dimDto.id = dimension.id; + dimDto.type = dimension.type; + dimDto.start_revision_id = dimension.start_revision.id; + dimDto.finish_revision_id = dimension.finish_revision.id; + dimDto.validator = dimension.validator; + dimDto.dimensionInfos = dimension.dimensionInfos.map((dimInfo: DimensionInfo) => { + const infoDto = new DimensionInfoDTO(); + infoDto.language = dimInfo.language; + infoDto.name = dimInfo.name; + infoDto.description = dimInfo.description; + infoDto.notes = dimInfo.notes; + return infoDto; + }); + dimDto.sources = dimension.sources.map((source: Source) => { + const sourceDto = new SourceDTO(); + sourceDto.id = source.id; + sourceDto.import_id = source.import.id; + sourceDto.revision_id = source.revision.id; + sourceDto.csv_field = source.csv_field; + sourceDto.action = source.action; + return sourceDto; + }); + return dimDto; + }); + dto.revisions = dataset.revisions.map((revision: RevisionEntity) => { + const revDto = new RevisionDTO(); + revDto.id = revision.id; + revDto.revision_index = revision.revision_index; + revDto.dataset_id = revision.dataset.id; + revDto.creation_date = revision.creation_date; + revDto.previous_revision_id = revision.previous_revision.id; + revDto.online_cube_filename = revision.online_cube_filename; + revDto.publish_date = revision.publish_date; + revDto.approval_date = revision.approval_date; + revDto.approved_by = revision.approved_by.name; + revDto.created_by = revision.created_by.name; + revDto.imports = revision.imports.map((imp: Import) => { + const impDto = new ImportDTO(); + impDto.id = imp.id; + impDto.revision_id = imp.revision.id; + impDto.mime_type = imp.mime_type; + impDto.filename = imp.filename; + impDto.hash = imp.hash; + impDto.uploaded_at = imp.uploaded_at; + impDto.type = imp.type; + impDto.location = imp.location; + return impDto; + }); + return revDto; + }); + return dto; + } + + static fromDatasetWithDimensions(dataset: Dataset): DatasetDTO { + const dto = new DatasetDTO(); + dto.id = dataset.id; + dto.creation_date = dataset.creation_date; + dto.created_by = dataset.created_by.name; + dto.live = dataset.live; + dto.archive = dataset.archive; + dto.datasetInfos = dataset.datasetInfos.map((datasetInfo: DatasetInfo) => { + const infoDto = new DatasetInfoDTO(); + infoDto.language = datasetInfo.language; + infoDto.title = datasetInfo.title; + infoDto.description = datasetInfo.description; + return infoDto; + }); + dto.dimensions = dataset.dimensions.map((dimension: Dimension) => { + const dimDto = new DimensionDTO(); + dimDto.id = dimension.id; + dimDto.type = dimension.type; + dimDto.start_revision_id = dimension.start_revision.id; + dimDto.finish_revision_id = dimension.finish_revision.id; + dimDto.validator = dimension.validator; + dimDto.sources = dimension.sources.map((source: Source) => { + const sourceDto = new SourceDTO(); + sourceDto.id = source.id; + sourceDto.import_id = source.import.id; + sourceDto.revision_id = source.revision.id; + sourceDto.csv_field = source.csv_field; + sourceDto.action = source.action; + return sourceDto; + }); + return dimDto; + }); + dto.revisions = []; + return dto; + } + + static fromDatasetWithRevisions(dataset: Dataset): DatasetDTO { + const dto = new DatasetDTO(); + dto.id = dataset.id; + dto.creation_date = dataset.creation_date; + dto.created_by = dataset.created_by.name; + dto.live = dataset.live; + dto.archive = dataset.archive; + dto.datasetInfos = dataset.datasetInfos.map((datasetInfo: DatasetInfo) => { + const infoDto = new DatasetInfoDTO(); + infoDto.language = datasetInfo.language; + infoDto.title = datasetInfo.title; + infoDto.description = datasetInfo.description; + return infoDto; + }); + dto.dimensions = []; + dto.revisions = dataset.revisions.map((revision: RevisionEntity) => { + const revDto = new RevisionDTO(); + revDto.id = revision.id; + revDto.revision_index = revision.revision_index; + revDto.dataset_id = revision.dataset.id; + revDto.creation_date = revision.creation_date; + revDto.previous_revision_id = revision.previous_revision.id; + revDto.online_cube_filename = revision.online_cube_filename; + revDto.publish_date = revision.publish_date; + revDto.approval_date = revision.approval_date; + revDto.approved_by = revision.approved_by.name; + revDto.created_by = revision.created_by.name; + revDto.imports = []; + return revDto; + }); + return dto; + } + + static fromDatasetWithShallowDimensionsAndRevisions(dataset: Dataset): DatasetDTO { + const dto = new DatasetDTO(); + dto.id = dataset.id; + dto.creation_date = dataset.creation_date; + dto.created_by = dataset.created_by.name; + dto.live = dataset.live; + dto.archive = dataset.archive; + dto.datasetInfos = dataset.datasetInfos.map((datasetInfo: DatasetInfo) => { + const infoDto = new DatasetInfoDTO(); + infoDto.language = datasetInfo.language; + infoDto.title = datasetInfo.title; + infoDto.description = datasetInfo.description; + return infoDto; + }); + dto.dimensions = dataset.dimensions.map((dimension: Dimension) => { + const dimDto = new DimensionDTO(); + dimDto.id = dimension.id; + dimDto.type = dimension.type; + dimDto.start_revision_id = dimension.start_revision.id; + dimDto.finish_revision_id = dimension.finish_revision.id; + dimDto.validator = dimension.validator; + dimDto.dimensionInfos = dimension.dimensionInfos.map((dimInfo: DimensionInfo) => { + const infoDto = new DimensionInfoDTO(); + infoDto.language = dimInfo.language; + infoDto.name = dimInfo.name; + infoDto.description = dimInfo.description; + infoDto.notes = dimInfo.notes; + return infoDto; + }); + dimDto.sources = []; + return dimDto; + }); + dto.revisions = dataset.revisions.map((revision: RevisionEntity) => { + const revDto = new RevisionDTO(); + revDto.id = revision.id; + revDto.revision_index = revision.revision_index; + revDto.dataset_id = revision.dataset.id; + revDto.creation_date = revision.creation_date; + revDto.previous_revision_id = revision.previous_revision.id; + revDto.online_cube_filename = revision.online_cube_filename; + revDto.publish_date = revision.publish_date; + revDto.approval_date = revision.approval_date; + revDto.approved_by = revision.approved_by.name; + revDto.created_by = revision.created_by.name; + revDto.imports = []; + return revDto; + }); + return dto; + } + + static fromDatasetWithImports(dataset: Dataset): DatasetDTO { + const dto = new DatasetDTO(); + dto.id = dataset.id; + dto.creation_date = dataset.creation_date; + dto.created_by = dataset.created_by.name; + dto.live = dataset.live; + dto.archive = dataset.archive; + dto.datasetInfos = dataset.datasetInfos.map((datasetInfo: DatasetInfo) => { + const infoDto = new DatasetInfoDTO(); + infoDto.language = datasetInfo.language; + infoDto.title = datasetInfo.title; + infoDto.description = datasetInfo.description; + return infoDto; + }); + dto.dimensions = []; + dto.revisions = dataset.revisions.map((revision: RevisionEntity) => { + const revDto = new RevisionDTO(); + revDto.id = revision.id; + revDto.revision_index = revision.revision_index; + revDto.dataset_id = revision.dataset.id; + revDto.creation_date = revision.creation_date; + revDto.previous_revision_id = revision.previous_revision.id; + revDto.online_cube_filename = revision.online_cube_filename; + revDto.publish_date = revision.publish_date; + revDto.approval_date = revision.approval_date; + revDto.approved_by = revision.approved_by.name; + revDto.created_by = revision.created_by.name; + revDto.imports = revision.imports.map((imp: Import) => { + const impDto = new ImportDTO(); + impDto.id = imp.id; + impDto.revision_id = imp.revision.id; + impDto.mime_type = imp.mime_type; + impDto.filename = imp.filename; + impDto.hash = imp.hash; + impDto.uploaded_at = imp.uploaded_at; + impDto.type = imp.type; + impDto.location = imp.location; + return impDto; + }); + return revDto; + }); + return dto; + } + + // Returns a very shallow DTO with only the dataset info + static fromDatasetShallow(dataset: Dataset): DatasetDTO { + const dto = new DatasetDTO(); + dto.id = dataset.id; + dto.creation_date = dataset.creation_date; + dto.created_by = dataset.created_by.name; + dto.live = dataset.live; + dto.archive = dataset.archive; + dto.datasetInfos = dataset.datasetInfos.map((datasetInfo: DatasetInfo) => { + const infoDto = new DatasetInfoDTO(); + infoDto.language = datasetInfo.language; + infoDto.title = datasetInfo.title; + infoDto.description = datasetInfo.description; + return infoDto; + }); + dto.dimensions = []; + dto.revisions = []; + return dto; + } +} diff --git a/src/models/error.ts b/src/dtos2/error.ts similarity index 100% rename from src/models/error.ts rename to src/dtos2/error.ts diff --git a/src/dtos2/filelist.ts b/src/dtos2/filelist.ts new file mode 100644 index 0000000..9a8f889 --- /dev/null +++ b/src/dtos2/filelist.ts @@ -0,0 +1,13 @@ +export interface DatasetTitle { + language: string; + title: string; +} + +export interface FileDescription { + dataset_id: string; + titles: DatasetTitle[]; +} + +export interface FileList { + files: FileDescription[]; +} diff --git a/src/models/processedcsv.ts b/src/dtos2/processedcsv.ts similarity index 92% rename from src/models/processedcsv.ts rename to src/dtos2/processedcsv.ts index 13c0ff5..6000f2b 100644 --- a/src/models/processedcsv.ts +++ b/src/dtos2/processedcsv.ts @@ -1,5 +1,4 @@ -import { DatasetDTO } from '../dtos/dataset-dto'; - +import { DatasetDTO } from './dataset-dto'; import { Error } from './error'; export interface PageInfo { diff --git a/src/dtos/upload-dto.ts b/src/dtos2/upload-dto.ts similarity index 74% rename from src/dtos/upload-dto.ts rename to src/dtos2/upload-dto.ts index f3de05a..3cfc2e6 100644 --- a/src/dtos/upload-dto.ts +++ b/src/dtos2/upload-dto.ts @@ -1,6 +1,4 @@ -/* eslint-disable import/no-cycle */ -import { Error } from '../models/error'; - +import { Error } from './error'; import { DatasetDTO } from './dataset-dto'; export interface UploadDTO { diff --git a/src/dtos/view-dto.ts b/src/dtos2/view-dto.ts similarity index 81% rename from src/dtos/view-dto.ts rename to src/dtos2/view-dto.ts index f231965..8528200 100644 --- a/src/dtos/view-dto.ts +++ b/src/dtos2/view-dto.ts @@ -1,7 +1,5 @@ -/* eslint-disable import/no-cycle */ -import { Error } from '../models/error'; - -import { DatasetDTO } from './dataset-dto'; +import { Error } from './error'; +import { DatasetDTO, ImportDTO } from './dataset-dto'; export interface PageInfo { total_records: number | undefined; @@ -18,6 +16,7 @@ export interface ViewErrDTO { export interface ViewDTO { success: boolean; dataset: DatasetDTO; + import: ImportDTO; current_page: number; page_info: PageInfo; pages: Array; diff --git a/src/entity2/dataset.ts b/src/entity2/dataset.ts index afe959a..3d35af8 100644 --- a/src/entity2/dataset.ts +++ b/src/entity2/dataset.ts @@ -1,9 +1,12 @@ import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, ManyToOne, OneToMany, JoinColumn } from 'typeorm'; import { User } from './user'; +// eslint-disable-next-line import/no-cycle import { RevisionEntity } from './revision'; // eslint-disable-next-line import/no-cycle import { DatasetInfo } from './dataset_info'; +// eslint-disable-next-line import/no-cycle +import { Dimension } from './dimension'; @Entity() export class Dataset extends BaseEntity { @@ -28,4 +31,7 @@ export class Dataset extends BaseEntity { @OneToMany(() => DatasetInfo, (datasetInfo) => datasetInfo.dataset) datasetInfos: DatasetInfo[]; + + @OneToMany(() => Dimension, (dimension) => dimension.dataset) + dimensions: Dimension[]; } diff --git a/src/entity2/dataset_info.ts b/src/entity2/dataset_info.ts index 60ab4d2..a3b642f 100644 --- a/src/entity2/dataset_info.ts +++ b/src/entity2/dataset_info.ts @@ -1,13 +1,14 @@ -import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, ManyToOne, JoinColumn } from 'typeorm'; +import { Entity, PrimaryColumn, Column, BaseEntity, ManyToOne, JoinColumn } from 'typeorm'; // eslint-disable-next-line import/no-cycle import { Dataset } from './dataset'; @Entity() export class DatasetInfo extends BaseEntity { - @PrimaryGeneratedColumn('uuid') + @PrimaryColumn({ name: 'dataset_id' }) id: string; + @PrimaryColumn({ name: 'language' }) @Column({ type: 'varchar', length: 5, nullable: true }) language: string; diff --git a/src/entity2/dimension.ts b/src/entity2/dimension.ts index a1db7c5..ca6e6d1 100644 --- a/src/entity2/dimension.ts +++ b/src/entity2/dimension.ts @@ -1,9 +1,12 @@ import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, ManyToOne, OneToMany, JoinColumn } from 'typeorm'; +// eslint-disable-next-line import/no-cycle import { Dataset } from './dataset'; +// eslint-disable-next-line import/no-cycle import { RevisionEntity } from './revision'; // eslint-disable-next-line import/no-cycle import { DimensionInfo } from './dimension_info'; +import { Source } from './source'; @Entity() export class Dimension extends BaseEntity { @@ -31,4 +34,7 @@ export class Dimension extends BaseEntity { @OneToMany(() => DimensionInfo, (dimensionInfo) => dimensionInfo.dimension) dimensionInfos: DimensionInfo[]; + + @OneToMany(() => Source, (source) => source.dimension) + sources: Source[]; } diff --git a/src/entity2/dimension_info.ts b/src/entity2/dimension_info.ts index fb973a2..49fbe8b 100644 --- a/src/entity2/dimension_info.ts +++ b/src/entity2/dimension_info.ts @@ -1,12 +1,14 @@ -import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, ManyToOne, JoinColumn } from 'typeorm'; +import { Entity, PrimaryColumn, Column, BaseEntity, ManyToOne, JoinColumn } from 'typeorm'; +// eslint-disable-next-line import/no-cycle import { Dimension } from './dimension'; @Entity() export class DimensionInfo extends BaseEntity { - @PrimaryGeneratedColumn('uuid') + @PrimaryColumn({ name: 'dimension_id' }) id: string; + @PrimaryColumn({ name: 'language' }) @Column({ type: 'varchar', length: 5, nullable: true }) language: string; diff --git a/src/entity2/import.ts b/src/entity2/import.ts index 0b3eb29..bd97b78 100644 --- a/src/entity2/import.ts +++ b/src/entity2/import.ts @@ -1,5 +1,15 @@ -import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, ManyToOne, OneToOne, JoinColumn } from 'typeorm'; +import { + Entity, + PrimaryGeneratedColumn, + Column, + BaseEntity, + OneToOne, + ManyToOne, + OneToMany, + JoinColumn +} from 'typeorm'; +// eslint-disable-next-line import/no-cycle import { RevisionEntity } from './revision'; // eslint-disable-next-line import/no-cycle import { CsvInfo } from './csv_info'; @@ -25,6 +35,18 @@ export class Import extends BaseEntity { @Column({ type: 'varchar', length: 255 }) filename: string; - @OneToOne(() => Source, (source) => source.import) - source: Source; + @Column({ type: 'varchar', length: 255 }) + hash: string; + + @Column({ type: 'timestamp', default: () => 'CURRENT_TIMESTAMP' }) + uploaded_at: Date; + + @Column({ type: 'enum', enum: ['Draft', 'FactTable', 'LookupTable'], nullable: false }) + type: string; + + @Column({ type: 'enum', enum: ['BlobStorage', 'Datalake'], nullable: false }) + location: string; + + @OneToMany(() => Source, (source) => source.import) + sources: Source[]; } diff --git a/src/entity2/revision.ts b/src/entity2/revision.ts index f6a78c5..ae02120 100644 --- a/src/entity2/revision.ts +++ b/src/entity2/revision.ts @@ -1,8 +1,10 @@ -import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, JoinColumn, ManyToOne } from 'typeorm'; +import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, JoinColumn, OneToMany, ManyToOne } from 'typeorm'; // eslint-disable-next-line import/no-cycle import { Dataset } from './dataset'; import { User } from './user'; +// eslint-disable-next-line import/no-cycle +import { Import } from './import'; interface Revision { id: string; @@ -15,6 +17,7 @@ interface Revision { approval_date: Date; approved_by: User; created_by: User; + imports: Import[]; } @Entity() @@ -45,6 +48,9 @@ export class RevisionEntity extends BaseEntity implements Revision { @Column({ type: 'timestamp', nullable: true }) approval_date: Date; + @OneToMany(() => Import, (importEntity) => importEntity.revision) + imports: Import[]; + @ManyToOne(() => User, { nullable: true }) @JoinColumn({ name: 'approved_by' }) approved_by: User; diff --git a/src/entity2/source.ts b/src/entity2/source.ts index 7337b0a..7c8a83c 100644 --- a/src/entity2/source.ts +++ b/src/entity2/source.ts @@ -1,5 +1,6 @@ -import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, ManyToOne, OneToOne, JoinColumn } from 'typeorm'; +import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, ManyToOne, JoinColumn } from 'typeorm'; +// eslint-disable-next-line import/no-cycle import { Dimension } from './dimension'; // eslint-disable-next-line import/no-cycle import { Import } from './import'; @@ -14,7 +15,7 @@ export class Source extends BaseEntity { @JoinColumn({ name: 'dimension_id' }) dimension: Dimension; - @OneToOne(() => Import, (importEntity) => importEntity.source, { nullable: false }) + @ManyToOne(() => Import, (importEntity) => importEntity.sources, { nullable: false }) @JoinColumn({ name: 'import_id' }) import: Import; @@ -22,9 +23,10 @@ export class Source extends BaseEntity { @JoinColumn({ name: 'revision_id' }) revision: RevisionEntity; - @ManyToOne(() => RevisionEntity) - @JoinColumn({ name: 'lookup_table_revision_id' }) - lookupTableRevision: RevisionEntity; + // Not implemented yet + // @ManyToOne(() => LookupTableRevision) + // @JoinColumn({ name: 'lookup_table_revision_id' }) + // lookupTableRevision: LookupTableRevision; @Column({ type: 'text' }) csv_field: string; diff --git a/src/entity2/user.ts b/src/entity2/user.ts index 3b5a634..48482b1 100644 --- a/src/entity2/user.ts +++ b/src/entity2/user.ts @@ -5,9 +5,6 @@ export class User extends BaseEntity { @PrimaryGeneratedColumn('uuid') id: string; - @Column({ unique: true }) - username: string; - @Column({ unique: true }) email: string; @@ -30,7 +27,10 @@ export class User extends BaseEntity { token_expiry: Date; @Column({ nullable: true }) - first_name: string; + name: string; + + @Column({ nullable: true }) + given_name: string; @Column({ nullable: true }) last_name: string; @@ -46,4 +46,24 @@ export class User extends BaseEntity { @Column({ type: 'boolean', default: true }) active: boolean; + + public static getTestUser(): User { + const user = new User(); + user.id = '12345678-1234-1234-1234-123456789012'; + user.email = 'test@test.com'; + user.oidc_subject = ''; + user.oidc_issuer = 'localAuth'; + user.access_token = ''; + user.refresh_token = ''; + user.id_token = ''; + user.token_expiry = new Date(); + user.name = 'Test User'; + user.given_name = 'Test'; + user.last_name = 'User'; + user.profile_picture = ''; + user.created_at = new Date(); + user.updated_at = new Date(); + user.active = true; + return user; + } } diff --git a/src/migration/1713284688846-dataset.ts b/src/migration/1713284688846-dataset.ts deleted file mode 100644 index da1a204..0000000 --- a/src/migration/1713284688846-dataset.ts +++ /dev/null @@ -1,29 +0,0 @@ -import { MigrationInterface, QueryRunner } from 'typeorm'; - -export class Dataset1713284688846 implements MigrationInterface { - public async up(queryRunner: QueryRunner): Promise { - await queryRunner.query( - ` - --Table Definition - CREATE TABLE IF NOT EXISTS "datasets" ( - "id" uuid NOT NULL DEFAULT gen_random_uuid(), - "internal_name" varchar(255) NOT NULL, - "creation_date" TIMESTAMP NOT NULL DEFAULT now(), - "created_by" varchar(255) NULL, - "last_modified" TIMESTAMP NOT NULL DEFAULT now(), - "modified_by" varchar(255) NULL, - "publish_date" TIMESTAMP NULL, - "published_by" varchar(255) null, - "live" BOOLEAN default false, - "approved_by" varchar(255) null, - "code" varchar(12) NULL, - CONSTRAINT "PK_dataset_id" PRIMARY KEY ("id") - ); - ` - ); - } - - public async down(queryRunner: QueryRunner): Promise { - await queryRunner.query(`DROP TABLE "dataset"`, undefined); - } -} diff --git a/src/migration/1716456713793-datafiles.ts b/src/migration/1716456713793-datafiles.ts deleted file mode 100644 index 958962b..0000000 --- a/src/migration/1716456713793-datafiles.ts +++ /dev/null @@ -1,26 +0,0 @@ -import { MigrationInterface, QueryRunner } from 'typeorm'; - -export class Datafiles1716456713793 implements MigrationInterface { - public async up(queryRunner: QueryRunner): Promise { - await queryRunner.query( - ` - CREATE TABLE IF NOT EXISTS "datafiles" ( - "id" uuid NOT NULL DEFAULT gen_random_uuid(), - "sha256hash" varchar(255) NOT NULL, - "draft" boolean NOT NULL DEFAULT true, - "dataset_id" uuid NOT NULL, - "creation_date" TIMESTAMP NOT NULL DEFAULT now(), - "created_by" varchar(255) NULL, - "last_modified" TIMESTAMP NOT NULL DEFAULT now(), - "modified_by" varchar(255) NULL, - CONSTRAINT "PK_datafiles_id" PRIMARY KEY ("id"), - CONSTRAINT "FK_dataset_to_datafile_dataset" FOREIGN KEY(dataset_id) REFERENCES datasets(id) - ) - ` - ); - } - - public async down(queryRunner: QueryRunner): Promise { - await queryRunner.query(`DROP TABLE "datafiles"`, undefined); - } -} diff --git a/src/migration/1716456733681-dataset_title.ts b/src/migration/1716456733681-dataset_title.ts deleted file mode 100644 index ca947b6..0000000 --- a/src/migration/1716456733681-dataset_title.ts +++ /dev/null @@ -1,21 +0,0 @@ -import { MigrationInterface, QueryRunner } from 'typeorm'; - -export class DatasetTitle1716456733681 implements MigrationInterface { - public async up(queryRunner: QueryRunner): Promise { - await queryRunner.query( - ` - CREATE TABLE IF NOT EXISTS "dataset_title" ( - "dataset_id" uuid NOT NULL, - "title" varchar(4096) NOT NULL, - "language_code" char(5) NOT NULL, - CONSTRAINT "PK_dataset_languageCode" PRIMARY KEY ("dataset_id", "language_code"), - CONSTRAINT "FK_dataset_to_dataset_name_dataset" FOREIGN KEY(dataset_id) REFERENCES datasets(id) - ); - ` - ); - } - - public async down(queryRunner: QueryRunner): Promise { - await queryRunner.query(`DROP TABLE "dataset_title"`, undefined); - } -} diff --git a/src/migration/1716456738219-dataset_description.ts b/src/migration/1716456738219-dataset_description.ts deleted file mode 100644 index ca8629b..0000000 --- a/src/migration/1716456738219-dataset_description.ts +++ /dev/null @@ -1,21 +0,0 @@ -import { MigrationInterface, QueryRunner } from 'typeorm'; - -export class DatasetDescription1716456738219 implements MigrationInterface { - public async up(queryRunner: QueryRunner): Promise { - await queryRunner.query( - ` - CREATE TABLE IF NOT EXISTS "dataset_description" ( - "dataset_id" uuid NOT NULL, - "description" varchar(4096) NOT NULL, - "language_code" char(5) NOT NULL, - CONSTRAINT "PK_description_dataset_languageCode" PRIMARY KEY ("dataset_id", "language_code"), - CONSTRAINT "FK_dataset_to_dataset_description_dataset" FOREIGN KEY(dataset_id) REFERENCES datasets(id) - ); - ` - ); - } - - public async down(queryRunner: QueryRunner): Promise { - await queryRunner.query(`DROP TABLE "dataset_description"`, undefined); - } -} diff --git a/src/migration/1716542997594-dataset_column.ts b/src/migration/1716542997594-dataset_column.ts deleted file mode 100644 index 635e473..0000000 --- a/src/migration/1716542997594-dataset_column.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { MigrationInterface, QueryRunner } from 'typeorm'; - -export class DatasetColumn1716542997594 implements MigrationInterface { - public async up(queryRunner: QueryRunner): Promise { - await queryRunner.query( - ` - CREATE TABLE IF NOT EXISTS "dataset_column" ( - "dataset_id" uuid NOT NULL, - "id" uuid NOT NULL DEFAULT gen_random_uuid(), - "csv_title" varchar(255) NOT NULL, - "type" varchar(10) NOT NULL, - CONSTRAINT "PK_name_dataset_column_id" PRIMARY KEY ("id"), - CONSTRAINT "FK_dataset_to_dataset_column_dataset" FOREIGN KEY(dataset_id) REFERENCES datasets(id) - ); - ` - ); - } - - public async down(queryRunner: QueryRunner): Promise { - await queryRunner.query(`DROP TABLE "dataset_column"`, undefined); - } -} diff --git a/src/migration/1716542997596-lookup_table.ts b/src/migration/1716542997596-lookup_table.ts deleted file mode 100644 index d825d0a..0000000 --- a/src/migration/1716542997596-lookup_table.ts +++ /dev/null @@ -1,27 +0,0 @@ -import { MigrationInterface, QueryRunner } from 'typeorm'; - -export class LookupTable1716542997596 implements MigrationInterface { - public async up(queryRunner: QueryRunner): Promise { - await queryRunner.query( - ` - CREATE TABLE IF NOT EXISTS "lookup_tables" ( - "id" uuid NOT NULL DEFAULT gen_random_uuid(), - "sha256hash" varchar(255) NOT NULL, - "dataset_id" uuid NOT NULL, - "creation_date" TIMESTAMP NOT NULL DEFAULT now(), - "created_by" varchar(255) NULL, - "last_modified" TIMESTAMP NOT NULL DEFAULT now(), - "modified_by" varchar(255) NULL, - "dataset_column_id" uuid NULL, - CONSTRAINT "PK_lookup_tables_id" PRIMARY KEY ("id"), - CONSTRAINT "FK_dataset_to_lookup_tables_dataset" FOREIGN KEY(dataset_id) REFERENCES datasets(id), - CONSTRAINT "FK_dataset_column_id_to_lookup_tables_dataset_column_id" FOREIGN KEY(dataset_column_id) REFERENCES dataset_column(id) - ); - ` - ); - } - - public async down(queryRunner: QueryRunner): Promise { - await queryRunner.query(`DROP TABLE "lookup_tbles"`, undefined); - } -} diff --git a/src/migration/1716543109797-column_title.ts b/src/migration/1716543109797-column_title.ts deleted file mode 100644 index f2b14fd..0000000 --- a/src/migration/1716543109797-column_title.ts +++ /dev/null @@ -1,21 +0,0 @@ -import { MigrationInterface, QueryRunner } from 'typeorm'; - -export class ColumnTitle1716543109797 implements MigrationInterface { - public async up(queryRunner: QueryRunner): Promise { - await queryRunner.query( - ` - CREATE TABLE IF NOT EXISTS "column_title" ( - "dataset_column_id" uuid NOT NULL, - "title" varchar(4096) NOT NULL, - "language_code" varchar(2) NOT NULL, - CONSTRAINT "PK_name_datasetid_languageCode" PRIMARY KEY ("dataset_column_id", "language_code"), - CONSTRAINT "FK_column_id_to_column_title_column" FOREIGN KEY(dataset_column_id) REFERENCES dataset_column(id) - ); - ` - ); - } - - public async down(queryRunner: QueryRunner): Promise { - await queryRunner.query(`DROP TABLE "column_title"`, undefined); - } -} diff --git a/src/migration/1723729297617-migration.ts b/src/migration/1723729297617-migration.ts new file mode 100644 index 0000000..45c9bb0 --- /dev/null +++ b/src/migration/1723729297617-migration.ts @@ -0,0 +1,128 @@ +import { MigrationInterface, QueryRunner } from 'typeorm'; + +export class Migration1723729297617 implements MigrationInterface { + public async up(queryRunner: QueryRunner): Promise { + await queryRunner.query(` + CREATE TABLE users ( + id UUID PRIMARY KEY, + username VARCHAR(255) NOT NULL UNIQUE, + email VARCHAR(255) NOT NULL UNIQUE, + oidc_subject VARCHAR(255) UNIQUE, + oidc_issuer VARCHAR(255), + access_token TEXT, + refresh_token TEXT, + id_token TEXT, + token_expiry TIMESTAMP, + first_name VARCHAR(255), + last_name VARCHAR(255), + profile_picture VARCHAR(255), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + active BOOLEAN NOT NULL DEFAULT true + ); + + CREATE TABLE dataset ( + id UUID PRIMARY KEY, + creation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + created_by UUID, + live TIMESTAMP, + archive TIMESTAMP, + FOREIGN KEY (created_by) REFERENCES users(id) + ); + + CREATE TABLE dataset_info ( + id UUID PRIMARY KEY, + dataset_id UUID, + language VARCHAR(5), + title TEXT, + description TEXT, + FOREIGN KEY (dataset_id) REFERENCES dataset(id) ON DELETE CASCADE + ); + + CREATE TABLE dimension_info ( + id UUID PRIMARY KEY, + dimension_id UUID, + language VARCHAR(5), + name TEXT, + description TEXT, + notes TEXT, + FOREIGN KEY (dimension_id) REFERENCES dimension(id) ON DELETE CASCADE + ); + + CREATE TABLE revision ( + id UUID PRIMARY KEY, + revision_index INT, + dataset_id UUID, + creation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + previous_revision_id UUID, + online_cube_filename VARCHAR(255), + publish_date TIMESTAMP, + approval_date TIMESTAMP, + approved_by UUID, + created_by UUID, + FOREIGN KEY (dataset_id) REFERENCES dataset(id) ON DELETE CASCADE, + FOREIGN KEY (previous_revision_id) REFERENCES revision(id) ON DELETE SET NULL, + FOREIGN KEY (approved_by) REFERENCES users(id), + FOREIGN KEY (created_by) REFERENCES users(id) + ); + + CREATE TABLE dimension ( + id UUID PRIMARY KEY, + dataset_id UUID, + type VARCHAR(255) NOT NULL, + start_revision_id UUID NOT NULL, + finish_revision_id UUID, + validator TEXT, + FOREIGN KEY (dataset_id) REFERENCES dataset(id) ON DELETE CASCADE, + FOREIGN KEY (start_revision_id) REFERENCES revision(id) ON DELETE CASCADE, + FOREIGN KEY (finish_revision_id) REFERENCES revision(id) ON DELETE SET NULL + ); + + CREATE TABLE csv_info ( + import_id UUID PRIMARY KEY, + delimiter CHAR(1), + quote CHAR(1), + linebreak VARCHAR(2), + FOREIGN KEY (import_id) REFERENCES import(id) ON DELETE CASCADE + ); + + CREATE TABLE import ( + id UUID PRIMARY KEY, + revision_id UUID, + csv_info UUID UNIQUE, + mime_type VARCHAR(255), + filename VARCHAR(255), + FOREIGN KEY (revision_id) REFERENCES revision(id) ON DELETE CASCADE, + FOREIGN KEY (csv_info) REFERENCES csv_info(import_id) ON DELETE CASCADE + ); + + CREATE TABLE source ( + id UUID PRIMARY KEY, + dimension_id UUID, + import_id UUID UNIQUE, + revision_id UUID, + lookup_table_revision_id UUID, + csv_field TEXT, + action VARCHAR(255) NOT NULL, + FOREIGN KEY (dimension_id) REFERENCES dimension(id) ON DELETE CASCADE, + FOREIGN KEY (import_id) REFERENCES import(id) ON DELETE CASCADE, + FOREIGN KEY (revision_id) REFERENCES revision(id) ON DELETE CASCADE, + FOREIGN KEY (lookup_table_revision_id) REFERENCES revision(id) ON DELETE SET NULL + ); + `); + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(` + DROP TABLE source; + DROP TABLE import; + DROP TABLE csv_info; + DROP TABLE dimension; + DROP TABLE revision; + DROP TABLE dimension_info; + DROP TABLE dataset_info; + DROP TABLE dataset; + DROP TABLE users; + `); + } +} diff --git a/src/models/filelist.ts b/src/models/filelist.ts deleted file mode 100644 index 522c4ba..0000000 --- a/src/models/filelist.ts +++ /dev/null @@ -1,8 +0,0 @@ -export interface FileDescription { - id: string; - internal_name: string; -} - -export interface FileList { - files: FileDescription[]; -} diff --git a/src/route/dataset-route.ts b/src/route/dataset-route.ts index 67973f5..a5ff5e5 100644 --- a/src/route/dataset-route.ts +++ b/src/route/dataset-route.ts @@ -4,7 +4,7 @@ import { Request, Response, Router } from 'express'; import multer from 'multer'; import pino from 'pino'; -import { ViewErrDTO } from '../dtos/view-dto'; +import { ViewErrDTO, ViewDTO } from '../dtos2/view-dto'; import { ENGLISH, WELSH, t } from '../app'; import { processCSVFromDatalake, @@ -12,13 +12,14 @@ import { uploadCSVToBlobStorage, DEFAULT_PAGE_SIZE } from '../controllers/csv-processor'; -import { DataLakeService } from '../controllers/datalake'; import { Dataset } from '../entity2/dataset'; import { DatasetInfo } from '../entity2/dataset_info'; -import { DatasetRevision } from '../entity2/revision'; +import { Dimension } from '../entity2/dimension'; +import { RevisionEntity } from '../entity2/revision'; import { Import } from '../entity2/import'; -import { FileDescription } from '../models/filelist'; -import { datasetToDatasetDTO } from '../dtos/dataset-dto'; +import { User } from '../entity2/user'; +import { DatasetTitle, FileDescription } from '../dtos2/filelist'; +import { DatasetDTO, DimensionDTO, RevisionDTO } from '../dtos2/dataset-dto'; export const logger = pino({ name: 'StatsWales-Alpha-App: DatasetRoute', @@ -29,25 +30,74 @@ const storage = multer.memoryStorage(); const upload = multer({ storage }); export const apiRoute = Router(); +const DATASET = 'Dataset'; +const REVISION = 'Revision'; +const DIMENSION = 'Dimension'; +const IMPORT = 'Import'; + function isValidUUID(uuid: string): boolean { const uuidRegex = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i; return uuid.length === 36 && uuidRegex.test(uuid); } -function checkDatasetID(datasetID: string, res: Response): boolean { - if (datasetID === undefined || datasetID === null) { +function validateIds(id: string, idType: string, res: Response): boolean { + if (id === undefined || id === null) { res.status(400); - res.json({ message: 'Dataset ID is null or undefined' }); + res.json({ message: `${idType} ID is null or undefined` }); return false; } - if (isValidUUID(datasetID) === false) { + if (isValidUUID(id) === false) { res.status(400); - res.json({ message: 'Dataset ID is not valid' }); + res.json({ message: `${idType} ID is not valid` }); return false; } return true; } +async function validateDataset(datasetID: string, res: Response): Promise { + if (!validateIds(datasetID, DATASET, res)) return null; + const dataset = await Dataset.findOneBy({ id: datasetID }); + if (!dataset) { + res.status(404); + res.json({ message: 'Dataset not found.' }); + return null; + } + return dataset; +} + +async function validateDimension(dimensionID: string, res: Response): Promise { + if (!validateIds(dimensionID, DIMENSION, res)) return null; + const dimension = await Dimension.findOneBy({ id: dimensionID }); + if (!dimension) { + res.status(404); + res.json({ message: 'Dimension not found.' }); + return null; + } + return dimension; +} + +async function validateRevision(revisionID: string, res: Response): Promise { + if (!validateIds(revisionID, REVISION, res)) return null; + const revision = await RevisionEntity.findOneBy({ id: revisionID }); + if (!revision) { + res.status(404); + res.json({ message: 'Revision not found.' }); + return null; + } + return revision; +} + +async function validateImport(importID: string, res: Response): Promise { + if (!validateIds(importID, IMPORT, res)) return null; + const importObj = await Import.findOneBy({ id: importID }); + if (!importObj) { + res.status(404); + res.json({ message: 'Import not found.' }); + return null; + } + return importObj; +} + function errorDtoGenerator( field: string, translationString: string, @@ -78,7 +128,10 @@ function errorDtoGenerator( }; } - +// POST /api/dataset +// Upload a CSV file to the server +// Returns a JSON object with the a DTO object that represents the dataset +// first revision and the import record. apiRoute.post('/', upload.single('csv'), async (req: Request, res: Response) => { if (!req.file) { res.status(400); @@ -96,7 +149,7 @@ apiRoute.post('/', upload.single('csv'), async (req: Request, res: Response) => try { importRecord = await uploadCSVToBlobStorage(req.file?.stream, req.file?.mimetype); } catch (err) { - logger.error(`An error occured trying to upload the file with the following error: ${e}`); + logger.error(`An error occured trying to upload the file with the following error: ${err}`); res.status(500); res.json({ message: 'Error uploading file' }); return; @@ -106,128 +159,137 @@ apiRoute.post('/', upload.single('csv'), async (req: Request, res: Response) => const dataset = new Dataset(); dataset.creation_date = new Date(); // TODO change how we handle authentication to get the user on the Backend - dataset.created_by = 'Test User'; - const saved_dataset_record = await dataset.save(); + // We are using a stub test user for all requests at the moment + dataset.created_by = User.getTestUser(); const datasetInfo = new DatasetInfo(); datasetInfo.language = lang; datasetInfo.title = title; - datasetInfo.dataset = saved_dataset_record; - datasetInfo.save(); - const revision = new DatasetRevision(); - revision.dataset = saved_dataset_record; + datasetInfo.dataset = dataset; + dataset.datasetInfos = [datasetInfo]; + const revision = new RevisionEntity(); + revision.dataset = dataset; revision.revision_index = 1; revision.creation_date = new Date(); // TODO change how we handle authentication to get the user on the Backend - revision.created_by = 'Test User'; - const saved_revision_record = await revision.save(); - importRecord.revision = saved_revision_record; - importRecord.save(); - + revision.created_by = User.getTestUser(); + importRecord.revision = revision; + revision.imports = [importRecord]; + const savedDataset = await dataset.save(); + const uploadDTO = DatasetDTO.fromDatasetWithImports(savedDataset); res.json(uploadDTO); }); -apiRoute.get('/', async (req, res) => { +// GET /api/dataset +// Returns a list of all datasets +// Returns a JSON object with a list of all datasets +// and their titles +apiRoute.get('/', async (req: Request, res: Response) => { const datasets = await Dataset.find(); const fileList: FileDescription[] = []; for (const dataset of datasets) { + const titles: DatasetTitle[] = []; + for (const datasetInfo of dataset.datasetInfos) { + titles.push({ + title: datasetInfo.title, + language: datasetInfo.language + }); + } fileList.push({ - internal_name: dataset.internalName, - id: dataset.id + titles, + dataset_id: dataset.id }); } res.json({ filelist: fileList }); }); -apiRoute.get('/:dataset', async (req, res) => { - const datasetID: string = req.params.dataset; - if (!checkDatasetID(datasetID, res)) return; - const dataset = await Dataset.findOneBy({ id: datasetID }); - if (!dataset) { - res.status(404); - res.json({ message: 'Dataset not found.' }); - return; - } - const datafiles = await dataset.datafiles; - if (datafiles.length < 1) { - res.status(404); - res.json({ message: 'Dataset has no datafiles attached' }); - return; - } - const dto = await datasetToDatasetDTO(dataset); +// GET /api/dataset/:dataset_id +// Returns a shallow dto of the dataset with the given ID +// Shallow gives the revisions and dimensions of the dataset only +apiRoute.get('/:dataset_id', async (req: Request, res: Response) => { + const datasetID: string = req.params.dataset_id; + const dataset = await validateDataset(datasetID, res); + if (dataset) return; + const dto = DatasetDTO.fromDatasetWithShallowDimensionsAndRevisions(dataset); res.json(dto); }); -apiRoute.get('/:dataset/csv', async (req, res) => { - const dataLakeService = new DataLakeService(); - const datasetID = req.params.dataset; - if (!checkDatasetID(datasetID, res)) return; - const dataset = await Dataset.findOneBy({ id: datasetID }); - if (dataset === undefined || dataset === null) { - res.status(404); - res.json({ message: 'Dataset not found... Dataset ID not found in Database' }); - return; - } - const datafiles = await dataset.datafiles; - const fileToDownload: Datafile | undefined = datafiles - .sort( - (first: Datafile, second: Datafile) => - new Date(second.creationDate).getTime() - new Date(first.creationDate).getTime() - ) - .shift(); - if (fileToDownload === undefined || fileToDownload === null) { - res.status(404); - res.json({ message: 'Dataset has no file attached' }); - return; - } - const file = await dataLakeService.downloadFile(`${fileToDownload.id}.csv`); - if (file === undefined || file === null) { - res.status(404); - res.json({ message: 'File not found... file is null or undefined' }); - return; - } - res.setHeader('Content-Length', file.length); - res.setHeader('Content-Type', 'text/csv'); - res.setHeader('Content-Disposition', `attachment; filename=${fileToDownload.id}.csv`); - res.write(file, 'binary'); - res.end(); +// GET /api/dataset/:dataset_id/dimension/id/:dimension_id +// Returns details of a dimension with its sources and imports +apiRoute.get('/:dataset_id/dimension/by-id/:dimension_id', async (req: Request, res: Response) => { + const datasetID: string = req.params.dataset_id; + const dataset = await validateDataset(datasetID, res); + if (dataset) return; + const dimensionID: string = req.params.dimension_id; + const dimension = await validateDimension(dimensionID, res); + if (!dimension) return; + const dto = DimensionDTO.fromDimension(dimension); + res.json(dto); }); -apiRoute.get('/:dataset/preview', async (req, res) => { - const datasetID = req.params.dataset; - if (!checkDatasetID(datasetID, res)) return; - const dataset = await Dataset.findOneBy({ id: datasetID }); - if (dataset === undefined || dataset === null) { - res.status(404); - res.json({ message: 'Dataset not found... Dataset ID not found in Database' }); - return; - } - const page_number_str: string = req.query.page_number || req.body?.page_number; - const page_size_str: string = req.query.page_size || req.body?.page_size; - const page_number: number = Number.parseInt(page_number_str, 10) || 1; - const page_size: number = Number.parseInt(page_size_str, 10) || DEFAULT_PAGE_SIZE; - const processedCSV = await processCSVFromBlobStorage(dataset, page_number, page_size); - if (!processedCSV.success) { - res.status(500); - } - res.json(processedCSV); +// GET /api/dataset/:dataset_id/revision/id/:revision_id +// Returns details of a revision with its imports +apiRoute.get('/:dataset_id/revision/by-id/:revision_id', async (req: Request, res: Response) => { + const datasetID: string = req.params.dataset_id; + const dataset = await validateDataset(datasetID, res); + if (dataset) return; + const revisionID: string = req.params.revision_id; + const revision = await validateRevision(revisionID, res); + if (!revision) return; + const dto = RevisionDTO.fromRevision(revision); + res.json(dto); }); -apiRoute.get('/:dataset/view', async (req, res) => { - const datasetID = req.params.dataset; - if (!checkDatasetID(datasetID, res)) return; - const dataset = await Dataset.findOneBy({ id: datasetID }); - if (dataset === undefined || dataset === null) { - res.status(404); - res.json({ message: 'Dataset not found... Dataset ID not found in Database' }); - return; +// GET /api/dataset/:dataset_id/revision/id/:revision_id/import/id/:import_id/preview +// Returns a view of the data file attached to the import +apiRoute.get( + '/:dataset/revision/by-id/:revision_id/import/by-id/:import_id/preview', + async (req: Request, res: Response) => { + const datasetID: string = req.params.dataset_id; + const dataset = await validateDataset(datasetID, res); + if (!dataset) return; + const revisionID: string = req.params.revision_id; + const revision = await validateRevision(revisionID, res); + if (!revision) return; + const importID: string = req.params.import_id; + const importRecord = await validateImport(importID, res); + if (!importRecord) return; + const page_number_str: string = req.query.page_number || req.body?.page_number; + const page_size_str: string = req.query.page_size || req.body?.page_size; + const page_number: number = Number.parseInt(page_number_str, 10) || 1; + const page_size: number = Number.parseInt(page_size_str, 10) || DEFAULT_PAGE_SIZE; + let processedCSV: ViewErrDTO | ViewDTO; + if (importRecord.location === 'BlobStorage') { + processedCSV = await processCSVFromBlobStorage(dataset, importRecord, page_number, page_size); + } else if (importRecord.location === 'Datalake') { + processedCSV = await processCSVFromDatalake(dataset, importRecord, page_number, page_size); + } else { + res.status(500); + res.json({ message: 'Import location not supported.' }); + return; + } + if (!processedCSV.success) { + res.status(500); + } + res.json(processedCSV); } - const page_number_str: string = req.query.page_number || req.body?.page_number; - const page_size_str: string = req.query.page_size || req.body?.page_size; - const page_number: number = Number.parseInt(page_number_str, 10) || 1; - const page_size: number = Number.parseInt(page_size_str, 10) || DEFAULT_PAGE_SIZE; - const processedCSV = await processCSVFromDatalake(dataset, page_number, page_size); - if (!processedCSV.success) { - res.status(500); - } - res.json(processedCSV); -}); +); + +// apiRoute.get('/:dataset/view', async (req: Request, res: Response) => { +// const datasetID = req.params.dataset; +// if (!checkDatasetID(datasetID, res)) return; +// const dataset = await Dataset.findOneBy({ id: datasetID }); +// if (dataset === undefined || dataset === null) { +// res.status(404); +// res.json({ message: 'Dataset not found... Dataset ID not found in Database' }); +// return; +// } +// const page_number_str: string = req.query.page_number || req.body?.page_number; +// const page_size_str: string = req.query.page_size || req.body?.page_size; +// const page_number: number = Number.parseInt(page_number_str, 10) || 1; +// const page_size: number = Number.parseInt(page_size_str, 10) || DEFAULT_PAGE_SIZE; +// const processedCSV = await processCSVFromDatalake(dataset, page_number, page_size); +// if (!processedCSV.success) { +// res.status(500); +// } +// res.json(processedCSV); +// }); diff --git a/test/dataset.test.ts b/test/dataset.test.ts index 7c9c54e..7db7d19 100644 --- a/test/dataset.test.ts +++ b/test/dataset.test.ts @@ -9,8 +9,8 @@ import { BlobStorageService } from '../src/controllers/blob-storage'; import app, { ENGLISH, WELSH, t, dbManager, connectToDb } from '../src/app'; import { Dataset } from '../src/entity/dataset'; import { Datafile } from '../src/entity/datafile'; -import { datasetToDatasetDTO } from '../src/dtos/dataset-dto'; -import { ViewErrDTO } from '../src/dtos/view-dto'; +import { DatasetDTO } from '../src/dtos2/dataset-dto'; +import { ViewErrDTO } from '../src/dtos2/view-dto'; import { MAX_PAGE_SIZE, MIN_PAGE_SIZE } from '../src/controllers/csv-processor'; import { datasourceOptions } from './test-data-source'; From ee5ec6baf030d1a86eef23cc60d990f8634e500e Mon Sep 17 00:00:00 2001 From: Jamie Maynard Date: Wed, 4 Sep 2024 00:15:43 +0100 Subject: [PATCH 3/5] Almost completely reimplements the backend This now makes better use of streams. It completes the use of the new data model with end points for viewing the dataset and getting a preview of the uploaded datafile. --- .eslintrc | 3 +- .prettierrc | 5 +- src/app.ts | 2 +- src/controllers/blob-storage.ts | 14 +- src/controllers/csv-processor.ts | 65 ++- src/controllers/datalake.ts | 10 + src/data-source.ts | 2 +- src/database-manager.ts | 19 +- src/dtos2/dataset-dto.ts | 507 ++++++++++++----------- src/dtos2/view-dto.ts | 7 + src/entity/column_title.ts | 28 -- src/entity/datafile.ts | 36 -- src/entity/dataset.ts | 124 ------ src/entity/dataset_column.ts | 37 -- src/entity/dataset_description.ts | 33 -- src/entity/dataset_title.ts | 28 -- src/entity/lookuptable.ts | 33 -- src/entity2/csv_info.ts | 17 +- src/entity2/dataset.ts | 32 +- src/entity2/dataset_info.ts | 12 +- src/entity2/dimension.ts | 48 ++- src/entity2/dimension_info.ts | 12 +- src/entity2/dimension_types.ts | 11 + src/entity2/import.ts | 46 +- src/entity2/revision.ts | 51 ++- src/entity2/source.ts | 33 +- src/entity2/{user.ts => users.ts} | 16 +- src/migration/1723729297617-migration.ts | 109 +++-- src/route/dataset-route.ts | 120 ++++-- src/server.ts | 4 +- test/dataset.test.ts | 332 ++++++++++----- test/test-data-source.ts | 18 +- 32 files changed, 959 insertions(+), 855 deletions(-) delete mode 100644 src/entity/column_title.ts delete mode 100644 src/entity/datafile.ts delete mode 100644 src/entity/dataset.ts delete mode 100644 src/entity/dataset_column.ts delete mode 100644 src/entity/dataset_description.ts delete mode 100644 src/entity/dataset_title.ts delete mode 100644 src/entity/lookuptable.ts create mode 100644 src/entity2/dimension_types.ts rename src/entity2/{user.ts => users.ts} (74%) diff --git a/.eslintrc b/.eslintrc index 9ec23c3..dcbf82e 100644 --- a/.eslintrc +++ b/.eslintrc @@ -22,7 +22,8 @@ "prettier/prettier": 2, "no-console": 0, "no-process-env": 0, - "no-inline-comments": 1 + "no-inline-comments": 0, + "line-comment-position": 0 }, "globals": { "NodeJS": true diff --git a/.prettierrc b/.prettierrc index 0706fd1..9c4858d 100644 --- a/.prettierrc +++ b/.prettierrc @@ -2,5 +2,8 @@ "semi": true, "trailingComma": "none", "singleQuote": true, - "printWidth": 120 + "printWidth": 120, + "rules": { + "no-inline-comments": "off" + } } diff --git a/src/app.ts b/src/app.ts index c5e55a6..fb2630f 100644 --- a/src/app.ts +++ b/src/app.ts @@ -22,7 +22,7 @@ export const logger: Logger = pino({ }); // Database handling and export -export const connectToDb = async (datasourceOptions: DataSourceOptions) => { +export const databaseManager = async (datasourceOptions: DataSourceOptions) => { dbManager = new DatabaseManager(datasourceOptions, logger); await dbManager.initializeDataSource(); }; diff --git a/src/controllers/blob-storage.ts b/src/controllers/blob-storage.ts index 1cd9943..72e0c0b 100644 --- a/src/controllers/blob-storage.ts +++ b/src/controllers/blob-storage.ts @@ -59,9 +59,14 @@ export class BlobStorageService { const blockBlobClient = this.containerClient.getBlockBlobClient(fileName); + const uploadOptions = { + bufferSize: 4 * 1024 * 1024, // 4MB buffer size + maxBuffers: 5 // Parallelism of 5 + }; const uploadBlobResponse: BlobUploadCommonResponse = await blockBlobClient.uploadStream( fileContent, - fileContent.readableLength + uploadOptions.bufferSize, + uploadOptions.maxBuffers ); return uploadBlobResponse; } @@ -100,12 +105,7 @@ export class BlobStorageService { public async getReadableStream(fileName: string) { const blockBlobClient = this.containerClient.getBlockBlobClient(fileName); const downloadBlockBlobResponse = await blockBlobClient.download(); - const readableStreamBody: ReadableStream | undefined = downloadBlockBlobResponse.readableStreamBody; - - if (!readableStreamBody) { - throw new Error('Failed to get readable stream body from download response.'); - } - return readableStreamBody; + return downloadBlockBlobResponse.readableStreamBody as Readable; } public async readFileToBuffer(fileName: string) { diff --git a/src/controllers/csv-processor.ts b/src/controllers/csv-processor.ts index 0dbe380..c910b29 100644 --- a/src/controllers/csv-processor.ts +++ b/src/controllers/csv-processor.ts @@ -7,7 +7,7 @@ import { parse } from 'csv'; import { ENGLISH, WELSH, logger, t } from '../app'; import { DatasetDTO, ImportDTO } from '../dtos2/dataset-dto'; import { Error } from '../dtos2/error'; -import { ViewDTO, ViewErrDTO } from '../dtos2/view-dto'; +import { ViewStream, ViewDTO, ViewErrDTO } from '../dtos2/view-dto'; import { Dataset } from '../entity2/dataset'; import { Import } from '../entity2/import'; @@ -153,6 +153,12 @@ export const uploadCSVToBlobStorage = async (fileStream: Readable, filetype: str } }; +export const uploadCSVBufferToBlobStorage = async (fileBuffer: Buffer, filetype: string): Promise => { + const fileStream = Readable.from(fileBuffer); + const importRecord: Import = await uploadCSVToBlobStorage(fileStream, filetype); + return importRecord; +}; + function setupPagination(page: number, total_pages: number): Array { const pages = []; if (page !== 1) pages.push('previous'); @@ -198,8 +204,8 @@ async function processCSVData( return { success: true, - dataset: DatasetDTO.fromDatasetShallow(dataset), - import: ImportDTO.fromImport(importObj), + dataset: await DatasetDTO.fromDatasetShallow(dataset), + import: await ImportDTO.fromImport(importObj), current_page: page, page_info: { total_records: dataArray.length, @@ -214,6 +220,31 @@ async function processCSVData( }; } +export const getFileFromDataLake = async (dataset: Dataset, importObj: Import): Promise => { + const datalakeService = new DataLakeService(); + let stream: Readable; + try { + stream = await datalakeService.downloadFileStream(importObj.filename); + } catch (err) { + logger.error(err); + return { + success: false, + errors: [ + { + field: 'csv', + message: [ + { lang: ENGLISH, message: t('errors.download_from_datalake', { lng: ENGLISH }) }, + { lang: WELSH, message: t('errors.download_from_datalake', { lng: WELSH }) } + ], + tag: { name: 'errors.download_from_datalake', params: {} } + } + ], + dataset_id: dataset.id + }; + } + return { success: true, stream }; +}; + export const processCSVFromDatalake = async ( dataset: Dataset, importObj: Import, @@ -244,6 +275,34 @@ export const processCSVFromDatalake = async ( return processCSVData(buff, page, size, dataset, importObj); }; +export const getFileFromBlobStorage = async (dataset: Dataset, importObj: Import): Promise => { + const blobStoageService = new BlobStorageService(); + let stream: Readable; + try { + stream = await blobStoageService.getReadableStream(importObj.filename); + } catch (err) { + logger.error(err); + return { + success: false, + errors: [ + { + field: 'csv', + message: [ + { lang: ENGLISH, message: t('errors.download_from_blobstorage', { lng: ENGLISH }) }, + { lang: WELSH, message: t('errors.download_from_blobstorage', { lng: WELSH }) } + ], + tag: { name: 'errors.download_from_datalake', params: {} } + } + ], + dataset_id: dataset.id + }; + } + return { + success: true, + stream + }; +}; + export const processCSVFromBlobStorage = async ( dataset: Dataset, importObj: Import, diff --git a/src/controllers/datalake.ts b/src/controllers/datalake.ts index acdd344..935c402 100644 --- a/src/controllers/datalake.ts +++ b/src/controllers/datalake.ts @@ -1,4 +1,5 @@ import { basename } from 'path'; +import { Readable } from 'stream'; import * as dotenv from 'dotenv'; import { DataLakeServiceClient, StorageSharedKeyCredential } from '@azure/storage-file-datalake'; @@ -111,4 +112,13 @@ export class DataLakeService { return downloaded; } + + public async downloadFileStream(fileName: string) { + const fileSystemClient = this.serviceClient.getFileSystemClient(fileSystemName); + const directoryClient = fileSystemClient.getDirectoryClient(defaultDirectoryName); + const fileClient = directoryClient.getFileClient(fileName); + + const downloadResponse = await fileClient.read(); + return downloadResponse.readableStreamBody as Readable; + } } diff --git a/src/data-source.ts b/src/data-source.ts index 5664a81..3e9afb5 100644 --- a/src/data-source.ts +++ b/src/data-source.ts @@ -18,7 +18,7 @@ export const datasourceOptions: DataSourceOptions = { ssl: true, synchronize: false, logging: false, - entities: [`${__dirname}/entity/*.ts`], + entities: [`${__dirname}/entity2/*.ts`], migrations: [`${__dirname}/migration/*.ts`], subscribers: [] }; diff --git a/src/database-manager.ts b/src/database-manager.ts index af107fa..f4f7927 100644 --- a/src/database-manager.ts +++ b/src/database-manager.ts @@ -1,15 +1,16 @@ -/* eslint-disable import/no-cycle */ import 'reflect-metadata'; import { DataSource, DataSourceOptions, EntityManager } from 'typeorm'; import { Logger } from 'pino'; -import { Dataset } from './entity/dataset'; -import { Datafile } from './entity/datafile'; -import { LookupTable } from './entity/lookuptable'; -import { DatasetTitle } from './entity/dataset_title'; -import { DatasetColumn } from './entity/dataset_column'; -import { DatasetDescription } from './entity/dataset_description'; -import { ColumnTitle } from './entity/column_title'; +import { Dataset } from './entity2/dataset'; +import { DatasetInfo } from './entity2/dataset_info'; +import { Revision } from './entity2/revision'; +import { Import } from './entity2/import'; +import { CsvInfo } from './entity2/csv_info'; +import { Source } from './entity2/source'; +import { Dimension } from './entity2/dimension'; +import { DimensionInfo } from './entity2/dimension_info'; +import { Users } from './entity2/users'; class DatabaseManager { private datasourceOptions: DataSourceOptions; @@ -38,7 +39,7 @@ class DatabaseManager { async initializeDataSource() { this.dataSource = new DataSource({ ...this.datasourceOptions, - entities: [Dataset, Datafile, LookupTable, DatasetTitle, DatasetDescription, DatasetColumn, ColumnTitle] + entities: [Dataset, DatasetInfo, Revision, Import, CsvInfo, Source, Dimension, DimensionInfo, Users] }); await this.dataSource diff --git a/src/dtos2/dataset-dto.ts b/src/dtos2/dataset-dto.ts index 078d144..b7ff7bc 100644 --- a/src/dtos2/dataset-dto.ts +++ b/src/dtos2/dataset-dto.ts @@ -3,7 +3,7 @@ import { Dimension } from '../entity2/dimension'; import { DimensionInfo } from '../entity2/dimension_info'; import { Source } from '../entity2/source'; import { Import } from '../entity2/import'; -import { RevisionEntity } from '../entity2/revision'; +import { Revision } from '../entity2/revision'; import { DatasetInfo } from '../entity2/dataset_info'; export class DatasetInfoDTO { @@ -36,35 +36,36 @@ export class DimensionDTO { finish_revision_id?: string; validator?: string; sources?: SourceDTO[]; - dimensionInfos?: DimensionInfoDTO[]; + dimensionInfo?: DimensionInfoDTO[]; dataset_id?: string; - static fromDimension(dimension: Dimension): DimensionDTO { - const dto = new DimensionDTO(); - dto.id = dimension.id; - dto.type = dimension.type; - dto.start_revision_id = dimension.start_revision.id; - dto.finish_revision_id = dimension.finish_revision ? dimension.finish_revision.id : undefined; - dto.validator = dimension.validator; - dto.dimensionInfos = dimension.dimensionInfos.map((dimensionInfo: DimensionInfo) => { + static async fromDimension(dimension: Dimension): Promise { + const dimDto = new DimensionDTO(); + dimDto.id = dimension.id; + dimDto.type = dimension.type; + dimDto.start_revision_id = (await dimension.start_revision).id; + dimDto.finish_revision_id = (await dimension.finish_revision)?.id || ''; + dimDto.validator = dimension.validator; + dimDto.dimensionInfo = (await dimension.dimensionInfo).map((dimInfo: DimensionInfo) => { const infoDto = new DimensionInfoDTO(); - infoDto.language = dimensionInfo.language; - infoDto.name = dimensionInfo.name; - infoDto.description = dimensionInfo.description; - infoDto.notes = dimensionInfo.notes; + infoDto.language = dimInfo.language; + infoDto.name = dimInfo.name; + infoDto.description = dimInfo.description; + infoDto.notes = dimInfo.notes; return infoDto; }); - dto.sources = dimension.sources.map((source: Source) => { - const sourceDto = new SourceDTO(); - sourceDto.id = source.id; - sourceDto.import_id = source.import.id; - sourceDto.revision_id = source.revision.id; - sourceDto.csv_field = source.csv_field; - sourceDto.action = source.action; - return sourceDto; - }); - dto.dataset_id = dimension.dataset.id; - return dto; + dimDto.sources = await Promise.all( + (await dimension.sources).map(async (source: Source) => { + const sourceDto = new SourceDTO(); + sourceDto.id = source.id; + sourceDto.import_id = (await source.import).id; + sourceDto.revision_id = (await source.revision).id; + sourceDto.csv_field = source.csv_field; + sourceDto.action = source.action; + return sourceDto; + }) + ); + return dimDto; } } @@ -74,20 +75,33 @@ export class ImportDTO { mime_type: string; filename: string; hash: string; - uploaded_at: Date; + uploaded_at: string; type: string; location: string; + sources?: SourceDTO[]; - static fromImport(importEntity: Import): ImportDTO { + static async fromImport(importEntity: Import): Promise { const dto = new ImportDTO(); dto.id = importEntity.id; - dto.revision_id = importEntity.revision.id; + const revision = await importEntity.revision; + dto.revision_id = revision.id; dto.mime_type = importEntity.mime_type; dto.filename = importEntity.filename; dto.hash = importEntity.hash; - dto.uploaded_at = importEntity.uploaded_at; + dto.uploaded_at = importEntity.uploaded_at?.toISOString() || ''; dto.type = importEntity.type; dto.location = importEntity.location; + dto.sources = await Promise.all( + (await importEntity.sources).map(async (source: Source) => { + const sourceDto = new SourceDTO(); + sourceDto.id = source.id; + sourceDto.import_id = (await source.import).id; + sourceDto.revision_id = (await source.revision).id; + sourceDto.csv_field = source.csv_field; + sourceDto.action = source.action; + return sourceDto; + }) + ); return dto; } } @@ -95,241 +109,206 @@ export class ImportDTO { export class RevisionDTO { id: string; revision_index: number; - creation_date: Date; + creation_date: string; previous_revision_id?: string; online_cube_filename?: string; - publish_date?: Date; - approval_date?: Date; + publish_date?: string; + approval_date?: string; approved_by?: string; created_by: string; imports: ImportDTO[]; dataset_id?: string; - static fromRevision(revision: RevisionEntity): RevisionDTO { - const dto = new RevisionDTO(); - dto.id = revision.id; - dto.revision_index = revision.revision_index; - dto.dataset_id = revision.dataset.id; - dto.creation_date = revision.creation_date; - dto.previous_revision_id = revision.previous_revision ? revision.previous_revision.id : undefined; - dto.online_cube_filename = revision.online_cube_filename; - dto.publish_date = revision.publish_date; - dto.approval_date = revision.approval_date; - dto.approved_by = revision.approved_by ? revision.approved_by.name : undefined; - dto.created_by = revision.created_by.name; - dto.imports = revision.imports.map((importEntity: Import) => { - return ImportDTO.fromImport(importEntity); - }); - return dto; - } -} - -export class DatasetDTO { - id: string; - creation_date: Date; - created_by: string; - live?: Date; - archive?: Date; - dimensions?: DimensionDTO[]; - revisions?: RevisionDTO[]; - datasetInfos?: DatasetInfoDTO[]; - - static fromDatasetComplete(dataset: Dataset): DatasetDTO { - const dto = new DatasetDTO(); - dto.id = dataset.id; - dto.creation_date = dataset.creation_date; - dto.created_by = dataset.created_by.name; - dto.live = dataset.live; - dto.archive = dataset.archive; - dto.datasetInfos = dataset.datasetInfos.map((datasetInfo: DatasetInfo) => { - const infoDto = new DatasetInfoDTO(); - infoDto.language = datasetInfo.language; - infoDto.title = datasetInfo.title; - infoDto.description = datasetInfo.description; - return infoDto; - }); - dto.dimensions = dataset.dimensions.map((dimension: Dimension) => { - const dimDto = new DimensionDTO(); - dimDto.id = dimension.id; - dimDto.type = dimension.type; - dimDto.start_revision_id = dimension.start_revision.id; - dimDto.finish_revision_id = dimension.finish_revision.id; - dimDto.validator = dimension.validator; - dimDto.dimensionInfos = dimension.dimensionInfos.map((dimInfo: DimensionInfo) => { - const infoDto = new DimensionInfoDTO(); - infoDto.language = dimInfo.language; - infoDto.name = dimInfo.name; - infoDto.description = dimInfo.description; - infoDto.notes = dimInfo.notes; - return infoDto; - }); - dimDto.sources = dimension.sources.map((source: Source) => { - const sourceDto = new SourceDTO(); - sourceDto.id = source.id; - sourceDto.import_id = source.import.id; - sourceDto.revision_id = source.revision.id; - sourceDto.csv_field = source.csv_field; - sourceDto.action = source.action; - return sourceDto; - }); - return dimDto; - }); - dto.revisions = dataset.revisions.map((revision: RevisionEntity) => { - const revDto = new RevisionDTO(); - revDto.id = revision.id; - revDto.revision_index = revision.revision_index; - revDto.dataset_id = revision.dataset.id; - revDto.creation_date = revision.creation_date; - revDto.previous_revision_id = revision.previous_revision.id; - revDto.online_cube_filename = revision.online_cube_filename; - revDto.publish_date = revision.publish_date; - revDto.approval_date = revision.approval_date; - revDto.approved_by = revision.approved_by.name; - revDto.created_by = revision.created_by.name; - revDto.imports = revision.imports.map((imp: Import) => { + static async fromRevision(revision: Revision): Promise { + const revDto = new RevisionDTO(); + revDto.id = revision.id; + revDto.revision_index = revision.revision_index; + revDto.dataset_id = (await revision.dataset).id; + revDto.creation_date = revision.creation_date.toISOString(); + revDto.previous_revision_id = (await revision.previous_revision)?.id || ''; + revDto.online_cube_filename = revision.online_cube_filename; + revDto.publish_date = revision.publish_date?.toISOString() || ''; + revDto.approval_date = revision.approval_date?.toISOString() || ''; + revDto.approved_by = (await revision.approved_by)?.name || undefined; + revDto.created_by = (await revision.created_by).name; + revDto.imports = await Promise.all( + (await revision.imports).map(async (imp: Import) => { const impDto = new ImportDTO(); impDto.id = imp.id; - impDto.revision_id = imp.revision.id; + impDto.revision_id = (await imp.revision).id; impDto.mime_type = imp.mime_type; impDto.filename = imp.filename; impDto.hash = imp.hash; - impDto.uploaded_at = imp.uploaded_at; + impDto.uploaded_at = imp.uploaded_at.toISOString(); impDto.type = imp.type; impDto.location = imp.location; return impDto; - }); - return revDto; - }); - return dto; + }) + ); + return revDto; } +} + +export class DatasetDTO { + id: string; + creation_date: string; + created_by: string; + live?: string; + archive?: string; + dimensions?: DimensionDTO[]; + revisions?: RevisionDTO[]; + datasetInfo?: DatasetInfoDTO[]; - static fromDatasetWithDimensions(dataset: Dataset): DatasetDTO { + static async fromDatasetShallow(dataset: Dataset): Promise { const dto = new DatasetDTO(); dto.id = dataset.id; - dto.creation_date = dataset.creation_date; - dto.created_by = dataset.created_by.name; - dto.live = dataset.live; - dto.archive = dataset.archive; - dto.datasetInfos = dataset.datasetInfos.map((datasetInfo: DatasetInfo) => { + dto.creation_date = dataset.creation_date.toISOString(); + dto.created_by = (await dataset.created_by).name; + dto.live = dataset.live?.toISOString() || ''; + dto.archive = dataset.archive?.toISOString() || ''; + dto.datasetInfo = (await dataset.datasetInfo).map((datasetInfo: DatasetInfo) => { const infoDto = new DatasetInfoDTO(); infoDto.language = datasetInfo.language; infoDto.title = datasetInfo.title; infoDto.description = datasetInfo.description; return infoDto; }); - dto.dimensions = dataset.dimensions.map((dimension: Dimension) => { - const dimDto = new DimensionDTO(); - dimDto.id = dimension.id; - dimDto.type = dimension.type; - dimDto.start_revision_id = dimension.start_revision.id; - dimDto.finish_revision_id = dimension.finish_revision.id; - dimDto.validator = dimension.validator; - dimDto.sources = dimension.sources.map((source: Source) => { - const sourceDto = new SourceDTO(); - sourceDto.id = source.id; - sourceDto.import_id = source.import.id; - sourceDto.revision_id = source.revision.id; - sourceDto.csv_field = source.csv_field; - sourceDto.action = source.action; - return sourceDto; - }); - return dimDto; - }); + dto.dimensions = []; dto.revisions = []; return dto; } - static fromDatasetWithRevisions(dataset: Dataset): DatasetDTO { + static async fromDatasetComplete(dataset: Dataset): Promise { const dto = new DatasetDTO(); dto.id = dataset.id; - dto.creation_date = dataset.creation_date; - dto.created_by = dataset.created_by.name; - dto.live = dataset.live; - dto.archive = dataset.archive; - dto.datasetInfos = dataset.datasetInfos.map((datasetInfo: DatasetInfo) => { + dto.creation_date = dataset.creation_date.toISOString(); + dto.created_by = (await dataset.created_by).name; + dto.live = dataset.live?.toISOString() || ''; + dto.archive = dataset.archive?.toISOString() || ''; + dto.datasetInfo = (await dataset.datasetInfo).map((datasetInfo: DatasetInfo) => { const infoDto = new DatasetInfoDTO(); infoDto.language = datasetInfo.language; infoDto.title = datasetInfo.title; infoDto.description = datasetInfo.description; return infoDto; }); - dto.dimensions = []; - dto.revisions = dataset.revisions.map((revision: RevisionEntity) => { - const revDto = new RevisionDTO(); - revDto.id = revision.id; - revDto.revision_index = revision.revision_index; - revDto.dataset_id = revision.dataset.id; - revDto.creation_date = revision.creation_date; - revDto.previous_revision_id = revision.previous_revision.id; - revDto.online_cube_filename = revision.online_cube_filename; - revDto.publish_date = revision.publish_date; - revDto.approval_date = revision.approval_date; - revDto.approved_by = revision.approved_by.name; - revDto.created_by = revision.created_by.name; - revDto.imports = []; - return revDto; - }); + dto.dimensions = await Promise.all( + (await dataset.dimensions).map(async (dimension: Dimension) => { + const dimDto = new DimensionDTO(); + dimDto.id = dimension.id; + dimDto.type = dimension.type; + dimDto.start_revision_id = (await dimension.start_revision).id; + dimDto.finish_revision_id = (await dimension.finish_revision)?.id || undefined; + dimDto.validator = dimension.validator; + dimDto.dimensionInfo = (await dimension.dimensionInfo).map((dimInfo: DimensionInfo) => { + const infoDto = new DimensionInfoDTO(); + infoDto.language = dimInfo.language; + infoDto.name = dimInfo.name; + infoDto.description = dimInfo.description; + infoDto.notes = dimInfo.notes; + return infoDto; + }); + dimDto.sources = await Promise.all( + (await dimension.sources).map(async (source: Source) => { + const sourceDto = new SourceDTO(); + sourceDto.id = source.id; + sourceDto.import_id = (await source.import).id; + sourceDto.revision_id = (await source.revision).id; + sourceDto.csv_field = source.csv_field; + sourceDto.action = source.action; + return sourceDto; + }) + ); + return dimDto; + }) + ); + dto.revisions = await Promise.all( + (await dataset.revisions).map(async (revision: Revision) => { + const revDto = new RevisionDTO(); + revDto.id = revision.id; + revDto.revision_index = revision.revision_index; + revDto.dataset_id = (await revision.dataset).id; + revDto.creation_date = revision.creation_date.toISOString(); + revDto.previous_revision_id = (await revision.previous_revision)?.id || undefined; + revDto.online_cube_filename = revision.online_cube_filename; + revDto.publish_date = revision.publish_date?.toISOString() || ''; + revDto.approval_date = revision.approval_date?.toISOString() || ''; + revDto.approved_by = (await revision.approved_by)?.name || undefined; + revDto.created_by = (await revision.created_by)?.name; + revDto.imports = await Promise.all( + (await revision.imports).map(async (imp: Import) => { + const impDto = new ImportDTO(); + impDto.id = imp.id; + impDto.revision_id = (await imp.revision).id; + impDto.mime_type = imp.mime_type; + impDto.filename = imp.filename; + impDto.hash = imp.hash; + impDto.uploaded_at = imp.uploaded_at.toISOString(); + impDto.type = imp.type; + impDto.location = imp.location; + impDto.sources = await Promise.all( + (await imp.sources).map(async (source: Source) => { + const sourceDto = new SourceDTO(); + sourceDto.id = source.id; + sourceDto.import_id = (await source.import).id; + sourceDto.revision_id = (await source.revision).id; + sourceDto.csv_field = source.csv_field; + sourceDto.action = source.action; + return sourceDto; + }) + ); + return impDto; + }) + ); + return revDto; + }) + ); return dto; } - static fromDatasetWithShallowDimensionsAndRevisions(dataset: Dataset): DatasetDTO { + static async fromDatasetWithRevisions(dataset: Dataset): Promise { const dto = new DatasetDTO(); dto.id = dataset.id; - dto.creation_date = dataset.creation_date; - dto.created_by = dataset.created_by.name; - dto.live = dataset.live; - dto.archive = dataset.archive; - dto.datasetInfos = dataset.datasetInfos.map((datasetInfo: DatasetInfo) => { + dto.creation_date = dataset.creation_date.toISOString(); + dto.created_by = (await dataset.created_by).name; + dto.live = dataset.live?.toISOString() || ''; + dto.archive = dataset.archive?.toISOString() || ''; + dto.datasetInfo = (await dataset.datasetInfo).map((datasetInfo: DatasetInfo) => { const infoDto = new DatasetInfoDTO(); infoDto.language = datasetInfo.language; infoDto.title = datasetInfo.title; infoDto.description = datasetInfo.description; return infoDto; }); - dto.dimensions = dataset.dimensions.map((dimension: Dimension) => { - const dimDto = new DimensionDTO(); - dimDto.id = dimension.id; - dimDto.type = dimension.type; - dimDto.start_revision_id = dimension.start_revision.id; - dimDto.finish_revision_id = dimension.finish_revision.id; - dimDto.validator = dimension.validator; - dimDto.dimensionInfos = dimension.dimensionInfos.map((dimInfo: DimensionInfo) => { - const infoDto = new DimensionInfoDTO(); - infoDto.language = dimInfo.language; - infoDto.name = dimInfo.name; - infoDto.description = dimInfo.description; - infoDto.notes = dimInfo.notes; - return infoDto; - }); - dimDto.sources = []; - return dimDto; - }); - dto.revisions = dataset.revisions.map((revision: RevisionEntity) => { - const revDto = new RevisionDTO(); - revDto.id = revision.id; - revDto.revision_index = revision.revision_index; - revDto.dataset_id = revision.dataset.id; - revDto.creation_date = revision.creation_date; - revDto.previous_revision_id = revision.previous_revision.id; - revDto.online_cube_filename = revision.online_cube_filename; - revDto.publish_date = revision.publish_date; - revDto.approval_date = revision.approval_date; - revDto.approved_by = revision.approved_by.name; - revDto.created_by = revision.created_by.name; - revDto.imports = []; - return revDto; - }); + dto.dimensions = []; + dto.revisions = await Promise.all( + (await dataset.revisions).map(async (revision: Revision) => { + const revDto = new RevisionDTO(); + revDto.id = revision.id; + revDto.revision_index = revision.revision_index; + revDto.dataset_id = (await revision.dataset).id; + revDto.creation_date = revision.creation_date.toISOString(); + revDto.previous_revision_id = (await revision.previous_revision).id; + revDto.online_cube_filename = revision.online_cube_filename; + revDto.publish_date = revision.publish_date?.toISOString() || ''; + revDto.approval_date = revision.approval_date?.toISOString() || ''; + revDto.approved_by = (await revision.approved_by)?.name || ''; + revDto.created_by = (await revision.created_by)?.name || ''; + revDto.imports = []; + return revDto; + }) + ); return dto; } - static fromDatasetWithImports(dataset: Dataset): DatasetDTO { + static async fromDatasetWithRevisionsAndImports(dataset: Dataset): Promise { const dto = new DatasetDTO(); dto.id = dataset.id; - dto.creation_date = dataset.creation_date; - dto.created_by = dataset.created_by.name; - dto.live = dataset.live; - dto.archive = dataset.archive; - dto.datasetInfos = dataset.datasetInfos.map((datasetInfo: DatasetInfo) => { + dto.creation_date = dataset.creation_date.toISOString(); + dto.created_by = (await dataset.created_by).name; + dto.live = dataset.live?.toISOString() || ''; + dto.archive = dataset.archive?.toISOString() || ''; + dto.datasetInfo = (await dataset.datasetInfo).map((datasetInfo: DatasetInfo) => { const infoDto = new DatasetInfoDTO(); infoDto.language = datasetInfo.language; infoDto.title = datasetInfo.title; @@ -337,52 +316,88 @@ export class DatasetDTO { return infoDto; }); dto.dimensions = []; - dto.revisions = dataset.revisions.map((revision: RevisionEntity) => { - const revDto = new RevisionDTO(); - revDto.id = revision.id; - revDto.revision_index = revision.revision_index; - revDto.dataset_id = revision.dataset.id; - revDto.creation_date = revision.creation_date; - revDto.previous_revision_id = revision.previous_revision.id; - revDto.online_cube_filename = revision.online_cube_filename; - revDto.publish_date = revision.publish_date; - revDto.approval_date = revision.approval_date; - revDto.approved_by = revision.approved_by.name; - revDto.created_by = revision.created_by.name; - revDto.imports = revision.imports.map((imp: Import) => { - const impDto = new ImportDTO(); - impDto.id = imp.id; - impDto.revision_id = imp.revision.id; - impDto.mime_type = imp.mime_type; - impDto.filename = imp.filename; - impDto.hash = imp.hash; - impDto.uploaded_at = imp.uploaded_at; - impDto.type = imp.type; - impDto.location = imp.location; - return impDto; - }); - return revDto; - }); + dto.revisions = await Promise.all( + (await dataset.revisions).map(async (revision: Revision) => { + const revDto = new RevisionDTO(); + revDto.id = revision.id; + revDto.revision_index = revision.revision_index; + revDto.creation_date = revision.creation_date.toISOString(); + revDto.previous_revision_id = (await revision.previous_revision)?.id || undefined; + revDto.online_cube_filename = revision.online_cube_filename; + revDto.publish_date = revision.publish_date?.toISOString() || ''; + revDto.approval_date = revision.approval_date?.toISOString() || ''; + revDto.approved_by = (await revision.approved_by)?.name || undefined; + revDto.created_by = (await revision.created_by)?.name; + revDto.imports = await Promise.all( + (await revision.imports).map((imp: Import) => { + const impDto = new ImportDTO(); + impDto.id = imp.id; + impDto.mime_type = imp.mime_type; + impDto.filename = imp.filename; + impDto.hash = imp.hash; + impDto.uploaded_at = imp.uploaded_at.toISOString(); + impDto.type = imp.type; + impDto.location = imp.location; + return impDto; + }) + ); + return revDto; + }) + ); return dto; } - // Returns a very shallow DTO with only the dataset info - static fromDatasetShallow(dataset: Dataset): DatasetDTO { + static async fromDatasetWithShallowDimensionsAndRevisions(dataset: Dataset): Promise { const dto = new DatasetDTO(); dto.id = dataset.id; - dto.creation_date = dataset.creation_date; - dto.created_by = dataset.created_by.name; - dto.live = dataset.live; - dto.archive = dataset.archive; - dto.datasetInfos = dataset.datasetInfos.map((datasetInfo: DatasetInfo) => { + dto.creation_date = dataset.creation_date.toISOString(); + dto.created_by = (await dataset.created_by).name; + dto.live = dataset.live?.toISOString() || ''; + dto.archive = dataset.archive?.toISOString() || ''; + dto.datasetInfo = (await dataset.datasetInfo).map((datasetInfo: DatasetInfo) => { const infoDto = new DatasetInfoDTO(); infoDto.language = datasetInfo.language; infoDto.title = datasetInfo.title; infoDto.description = datasetInfo.description; return infoDto; }); - dto.dimensions = []; - dto.revisions = []; + dto.dimensions = await Promise.all( + (await dataset.dimensions).map(async (dimension: Dimension) => { + const dimDto = new DimensionDTO(); + dimDto.id = dimension.id; + dimDto.type = dimension.type; + dimDto.start_revision_id = (await dimension.start_revision).id; + dimDto.finish_revision_id = (await dimension.finish_revision)?.id || undefined; + dimDto.validator = dimension.validator; + dimDto.dimensionInfo = (await dimension.dimensionInfo).map((dimInfo: DimensionInfo) => { + const infoDto = new DimensionInfoDTO(); + infoDto.language = dimInfo.language; + infoDto.name = dimInfo.name; + infoDto.description = dimInfo.description; + infoDto.notes = dimInfo.notes; + return infoDto; + }); + dimDto.sources = []; // Sources are intentionally empty in this method as per original code + return dimDto; + }) + ); + dto.revisions = await Promise.all( + (await dataset.revisions).map(async (revision: Revision) => { + const revDto = new RevisionDTO(); + revDto.id = revision.id; + revDto.revision_index = revision.revision_index; + revDto.dataset_id = (await revision.dataset).id; + revDto.creation_date = revision.creation_date.toISOString(); + revDto.previous_revision_id = (await revision.previous_revision)?.id || undefined; + revDto.online_cube_filename = revision.online_cube_filename; + revDto.publish_date = revision.publish_date?.toISOString() || ''; + revDto.approval_date = revision.approval_date?.toISOString() || ''; + revDto.approved_by = (await revision.approved_by)?.name || ''; + revDto.created_by = (await revision.created_by)?.name || ''; + revDto.imports = []; // Imports are intentionally empty in this method as per original code + return revDto; + }) + ); return dto; } } diff --git a/src/dtos2/view-dto.ts b/src/dtos2/view-dto.ts index 8528200..22ed8bf 100644 --- a/src/dtos2/view-dto.ts +++ b/src/dtos2/view-dto.ts @@ -1,3 +1,5 @@ +import { Readable } from 'stream'; + import { Error } from './error'; import { DatasetDTO, ImportDTO } from './dataset-dto'; @@ -25,3 +27,8 @@ export interface ViewDTO { headers: Array | undefined; data: Array>; } + +export interface ViewStream { + success: boolean; + stream: Readable; +} diff --git a/src/entity/column_title.ts b/src/entity/column_title.ts deleted file mode 100644 index 249644d..0000000 --- a/src/entity/column_title.ts +++ /dev/null @@ -1,28 +0,0 @@ -import { Entity, Column, PrimaryColumn, BaseEntity, ManyToOne, JoinColumn } from 'typeorm'; - -// eslint-disable-next-line import/no-cycle -import { DatasetColumn } from './dataset_column'; - -@Entity({ name: 'column_title' }) -export class ColumnTitle extends BaseEntity { - @PrimaryColumn({ name: 'dataset_column_id' }) - datasetColumnId: string; - - @ManyToOne(() => DatasetColumn, (datasetColumn) => datasetColumn.title, { onDelete: 'CASCADE' }) - @JoinColumn({ name: 'dataset_column_id' }) - datasetColumn: DatasetColumn; - - @Column({ nullable: false }) - title: string; - - @PrimaryColumn({ name: 'language_code' }) - languageCode: string; - - public static createColumnFromString(column: DatasetColumn, title: string, language: string) { - const columnTitle = new ColumnTitle(); - columnTitle.title = title; - columnTitle.languageCode = language; - columnTitle.datasetColumn = column; - return columnTitle; - } -} diff --git a/src/entity/datafile.ts b/src/entity/datafile.ts deleted file mode 100644 index 2213c08..0000000 --- a/src/entity/datafile.ts +++ /dev/null @@ -1,36 +0,0 @@ -import { Entity, PrimaryGeneratedColumn, Column, CreateDateColumn, BaseEntity, ManyToOne, JoinColumn } from 'typeorm'; - -// eslint-disable-next-line import/no-cycle -import { Dataset } from './dataset'; - -@Entity({ name: 'datafiles' }) -export class Datafile extends BaseEntity { - @PrimaryGeneratedColumn('uuid') - id: string; - - @Column({ nullable: false }) - sha256hash: string; - - @Column({ name: 'draft', default: true }) - draft: boolean; - - @ManyToOne(() => Dataset, (dataset) => dataset.datafiles, { onDelete: 'CASCADE', eager: true }) - @JoinColumn({ name: 'dataset_id' }) - dataset: Dataset; - - @CreateDateColumn({ name: 'creation_date' }) - creationDate: Date; - - @Column({ name: 'created_by', nullable: true }) - createdBy: string; - - public static createDatafile(dataset: Dataset, hash: string, user: string): Datafile { - const datafile = new Datafile(); - datafile.dataset = dataset; - datafile.draft = true; - datafile.sha256hash = hash; - datafile.createdBy = user; - datafile.creationDate = new Date(Date.now()); - return datafile; - } -} diff --git a/src/entity/dataset.ts b/src/entity/dataset.ts deleted file mode 100644 index 360e824..0000000 --- a/src/entity/dataset.ts +++ /dev/null @@ -1,124 +0,0 @@ -/* eslint-disable import/no-cycle */ -import { UUID } from 'crypto'; - -import { Entity, PrimaryGeneratedColumn, Column, CreateDateColumn, BaseEntity, OneToMany, JoinColumn } from 'typeorm'; - -import { dbManager } from '../app'; - -import { LookupTable } from './lookuptable'; -import { Datafile } from './datafile'; -import { DatasetDescription } from './dataset_description'; -import { DatasetTitle } from './dataset_title'; -import { DatasetColumn } from './dataset_column'; - -@Entity({ name: 'datasets' }) -export class Dataset extends BaseEntity { - @PrimaryGeneratedColumn('uuid') - id: string; - - @Column({ name: 'internal_name', nullable: false }) - internalName: string; - - @CreateDateColumn({ name: 'creation_date' }) - creationDate: Date; - - @Column({ name: 'created_by', nullable: true }) - createdBy: string; - - @CreateDateColumn({ name: 'last_modified' }) - lastModified: Date; - - @Column({ name: 'modified_by', nullable: true }) - modifiedBy: string; - - @Column({ name: 'publish_date', nullable: true }) - publishData: Date; - - @Column({ name: 'published_by', nullable: true }) - publishedBy: string; - - @Column({ nullable: true }) - live: boolean; - - @Column({ nullable: true }) - code: string; - - @OneToMany(() => Datafile, (datafile) => datafile.dataset, { - cascade: true, - orphanedRowAction: 'delete' - }) - @JoinColumn() - datafiles: Promise; - - @OneToMany(() => LookupTable, (lookupTable) => lookupTable.dataset, { - cascade: true, - orphanedRowAction: 'delete' - }) - @JoinColumn() - lookuptables: Promise; - - @OneToMany(() => DatasetTitle, (datasetTitle) => datasetTitle.dataset, { - cascade: true, - orphanedRowAction: 'delete' - }) - @JoinColumn([{ referencedColumnName: 'dataset_id' }, { referencedColumnName: 'language' }]) - title: Promise; - - @OneToMany(() => DatasetDescription, (datasetDescription) => datasetDescription.dataset, { - cascade: true, - orphanedRowAction: 'delete' - }) - @JoinColumn([{ referencedColumnName: 'dataset_id' }, { referencedColumnName: 'language' }]) - description: Promise; - - @OneToMany(() => DatasetColumn, (datasetColumn) => datasetColumn.dataset, { - cascade: true, - orphanedRowAction: 'delete' - }) - @JoinColumn() - columns: Promise; - - public static createDataset(internalName: string, user?: string, id?: UUID): Dataset { - const dataset = new Dataset(); - if (id) dataset.id = id; - dataset.internalName = internalName; - if (user) { - dataset.createdBy = user; - dataset.modifiedBy = user; - } - dataset.live = false; - return dataset; - } - - public addCode(code: string) { - if (code.length > 12) { - throw new Error('Code is to long'); - } - this.code = code.toUpperCase(); - } - - public async addDatafile(file: Datafile) { - file.dataset = this; - await dbManager.getEntityManager().save(file); - } - - public addLookuptables(lookupTable: LookupTable) { - dbManager.getEntityManager().save(lookupTable); - } - - public addTitleByString(title: string, lang: string) { - dbManager.getEntityManager().save(DatasetTitle.datasetTitleFromString(this, title, lang)); - } - - public addTitle(title: DatasetTitle) { - dbManager.getEntityManager().save(title); - } - - public addDescriptionByString(description: string, lang: string) { - dbManager.getEntityManager().save(DatasetDescription.datasetDescriptionFromString(this, description, lang)); - } - - public addDescription(description: DatasetDescription) { - dbManager.getEntityManager().save(description); - } -} diff --git a/src/entity/dataset_column.ts b/src/entity/dataset_column.ts deleted file mode 100644 index fb561ba..0000000 --- a/src/entity/dataset_column.ts +++ /dev/null @@ -1,37 +0,0 @@ -/* eslint-disable import/no-cycle */ -import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, OneToMany, ManyToOne, JoinColumn } from 'typeorm'; - -import { dbManager } from '../app'; - -import { Dataset } from './dataset'; -import { ColumnTitle } from './column_title'; - -@Entity({ name: 'dataset_column' }) -export class DatasetColumn extends BaseEntity { - @PrimaryGeneratedColumn('uuid') - id: string; - - @Column({ nullable: false }) - csvTitle: string; - - @Column({ nullable: true }) - type: string; - - @ManyToOne(() => Dataset, (dataset) => dataset.columns, { onDelete: 'CASCADE' }) - @JoinColumn({ name: 'dataset_id' }) - dataset: Dataset; - - @OneToMany(() => ColumnTitle, (columnTitle) => columnTitle.datasetColumn, { - cascade: true - }) - @JoinColumn([{ name: 'dataset_id' }, { name: 'language_code' }]) - title: Promise; - - public addTitleByString(title: string, languague: string) { - dbManager.getEntityManager().save(ColumnTitle.createColumnFromString(this, title, languague)); - } - - public addTitle(title: ColumnTitle) { - dbManager.getEntityManager().save(title); - } -} diff --git a/src/entity/dataset_description.ts b/src/entity/dataset_description.ts deleted file mode 100644 index 672ddbd..0000000 --- a/src/entity/dataset_description.ts +++ /dev/null @@ -1,33 +0,0 @@ -/* eslint-disable import/no-cycle */ -import { Entity, PrimaryColumn, Column, BaseEntity, ManyToOne, JoinColumn } from 'typeorm'; - -import { Dataset } from './dataset'; - -@Entity({ name: 'dataset_description' }) -export class DatasetDescription extends BaseEntity { - @PrimaryColumn({ name: 'dataset_id' }) - datasetID: string; - - @ManyToOne(() => Dataset, (dataset) => dataset.description, { onDelete: 'CASCADE' }) - @JoinColumn({ name: 'dataset_id' }) - dataset: Dataset; - - @Column({ nullable: false }) - description: string; - - @PrimaryColumn() - @Column({ name: 'language_code' }) - languageCode: string; - - public static datasetDescriptionFromString( - dataset: Dataset, - description: string, - language: string - ): DatasetDescription { - const datasetDescription = new DatasetDescription(); - datasetDescription.dataset = dataset; - datasetDescription.description = description; - datasetDescription.languageCode = language; - return datasetDescription; - } -} diff --git a/src/entity/dataset_title.ts b/src/entity/dataset_title.ts deleted file mode 100644 index 4f4ac65..0000000 --- a/src/entity/dataset_title.ts +++ /dev/null @@ -1,28 +0,0 @@ -import { Entity, PrimaryColumn, Column, BaseEntity, ManyToOne, JoinColumn } from 'typeorm'; - -// eslint-disable-next-line import/no-cycle -import { Dataset } from './dataset'; - -@Entity({ name: 'dataset_title' }) -export class DatasetTitle extends BaseEntity { - @PrimaryColumn({ name: 'dataset_id' }) - datasetId: string; - - @ManyToOne(() => Dataset, (dataset) => dataset.description, { onDelete: 'CASCADE' }) - @JoinColumn({ name: 'dataset_id' }) - dataset: Dataset; - - @Column({ nullable: false }) - title: string; - - @PrimaryColumn({ name: 'language_code' }) - languageCode: string; - - public static datasetTitleFromString(dataset: Dataset, title: string, language: string): DatasetTitle { - const datasetTitle = new DatasetTitle(); - datasetTitle.dataset = dataset; - datasetTitle.title = title; - datasetTitle.languageCode = language; - return datasetTitle; - } -} diff --git a/src/entity/lookuptable.ts b/src/entity/lookuptable.ts deleted file mode 100644 index 48c7ec8..0000000 --- a/src/entity/lookuptable.ts +++ /dev/null @@ -1,33 +0,0 @@ -/* eslint-disable import/no-cycle */ -import { Entity, PrimaryGeneratedColumn, Column, CreateDateColumn, BaseEntity, ManyToOne, JoinColumn } from 'typeorm'; - -import { Dataset } from './dataset'; -import { DatasetColumn } from './dataset_column'; - -@Entity({ name: 'lookuptable' }) -export class LookupTable extends BaseEntity { - @PrimaryGeneratedColumn('uuid') - id: string; - - @Column({ nullable: false }) - sha256hash: string; - - @ManyToOne(() => Dataset, (dataset) => dataset.datafiles, { onDelete: 'CASCADE' }) - @JoinColumn({ name: 'dataset_id' }) - dataset: Dataset; - - @CreateDateColumn({ name: 'creation_date' }) - creationDate: Date; - - @Column({ name: 'created_by', nullable: true }) - createdBy: string; - - @CreateDateColumn({ name: 'last_modified' }) - lastModified: Date; - - @Column({ name: 'modified_by', nullable: true }) - modifiedBy: string; - - @ManyToOne(() => DatasetColumn, (datasetColumn) => datasetColumn.id, { onDelete: 'CASCADE' }) - datasetColumn: DatasetColumn; -} diff --git a/src/entity2/csv_info.ts b/src/entity2/csv_info.ts index 73b4638..53b0d4a 100644 --- a/src/entity2/csv_info.ts +++ b/src/entity2/csv_info.ts @@ -1,23 +1,26 @@ -import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, OneToOne, JoinColumn } from 'typeorm'; +import { Entity, PrimaryColumn, Column, BaseEntity, ManyToOne, JoinColumn } from 'typeorm'; // eslint-disable-next-line import/no-cycle import { Import } from './import'; @Entity() export class CsvInfo extends BaseEntity { - @PrimaryGeneratedColumn('uuid') - import_id: string; + @PrimaryColumn({ name: 'import_id', type: process.env.NODE_ENV === 'test' ? 'text' : 'uuid' }) + id: string; - @Column({ type: 'char', length: 1 }) + @Column({ type: 'varchar', length: 1 }) delimiter: string; - @Column({ type: 'char', length: 1 }) + @Column({ type: 'varchar', length: 1 }) quote: string; @Column({ type: 'varchar', length: 2 }) linebreak: string; - @OneToOne(() => Import, (importEntity) => importEntity.csvInfo, { onDelete: 'CASCADE' }) + @ManyToOne(() => Import, (importEntity) => importEntity.csvInfo, { + onDelete: 'CASCADE', + orphanedRowAction: 'delete' + }) @JoinColumn({ name: 'import_id' }) - import: Import; + import: Promise; } diff --git a/src/entity2/dataset.ts b/src/entity2/dataset.ts index 3d35af8..5a8cbbd 100644 --- a/src/entity2/dataset.ts +++ b/src/entity2/dataset.ts @@ -1,8 +1,8 @@ import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, ManyToOne, OneToMany, JoinColumn } from 'typeorm'; -import { User } from './user'; +import { Users } from './users'; // eslint-disable-next-line import/no-cycle -import { RevisionEntity } from './revision'; +import { Revision } from './revision'; // eslint-disable-next-line import/no-cycle import { DatasetInfo } from './dataset_info'; // eslint-disable-next-line import/no-cycle @@ -13,25 +13,31 @@ export class Dataset extends BaseEntity { @PrimaryGeneratedColumn('uuid') id: string; - @Column({ type: 'timestamp', default: () => 'CURRENT_TIMESTAMP' }) + @Column({ type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', default: () => 'CURRENT_TIMESTAMP' }) creation_date: Date; - @ManyToOne(() => User) + @ManyToOne(() => Users) @JoinColumn({ name: 'created_by' }) - created_by: User; + created_by: Promise; - @Column({ type: 'timestamp', nullable: true }) + @Column({ type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', nullable: true }) live: Date; - @Column({ type: 'timestamp', nullable: true }) + @Column({ type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', nullable: true }) archive: Date; - @OneToMany(() => RevisionEntity, (revision) => revision.dataset) - revisions: RevisionEntity[]; + @OneToMany(() => DatasetInfo, (datasetInfo) => datasetInfo.dataset, { + cascade: true + }) + datasetInfo: Promise; - @OneToMany(() => DatasetInfo, (datasetInfo) => datasetInfo.dataset) - datasetInfos: DatasetInfo[]; + @OneToMany(() => Dimension, (dimension) => dimension.dataset, { + cascade: true + }) + dimensions: Promise; - @OneToMany(() => Dimension, (dimension) => dimension.dataset) - dimensions: Dimension[]; + @OneToMany(() => Revision, (revision) => revision.dataset, { + cascade: true + }) + revisions: Promise; } diff --git a/src/entity2/dataset_info.ts b/src/entity2/dataset_info.ts index a3b642f..fb45f4c 100644 --- a/src/entity2/dataset_info.ts +++ b/src/entity2/dataset_info.ts @@ -5,11 +5,10 @@ import { Dataset } from './dataset'; @Entity() export class DatasetInfo extends BaseEntity { - @PrimaryColumn({ name: 'dataset_id' }) + @PrimaryColumn({ name: 'dataset_id', type: process.env.NODE_ENV === 'test' ? 'text' : 'uuid' }) id: string; - @PrimaryColumn({ name: 'language' }) - @Column({ type: 'varchar', length: 5, nullable: true }) + @PrimaryColumn({ name: 'language', type: 'varchar', length: 5 }) language: string; @Column({ type: 'text', nullable: true }) @@ -18,7 +17,10 @@ export class DatasetInfo extends BaseEntity { @Column({ type: 'text', nullable: true }) description: string; - @ManyToOne(() => Dataset, (dataset) => dataset.datasetInfos) + @ManyToOne(() => Dataset, (dataset) => dataset.datasetInfo, { + onDelete: 'CASCADE', + orphanedRowAction: 'delete' + }) @JoinColumn({ name: 'dataset_id' }) - dataset: Dataset; + dataset: Promise; } diff --git a/src/entity2/dimension.ts b/src/entity2/dimension.ts index ca6e6d1..b241508 100644 --- a/src/entity2/dimension.ts +++ b/src/entity2/dimension.ts @@ -3,38 +3,58 @@ import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, ManyToOne, OneToMan // eslint-disable-next-line import/no-cycle import { Dataset } from './dataset'; // eslint-disable-next-line import/no-cycle -import { RevisionEntity } from './revision'; +import { Revision } from './revision'; // eslint-disable-next-line import/no-cycle import { DimensionInfo } from './dimension_info'; import { Source } from './source'; +import { DimensionType } from './dimension_types'; @Entity() export class Dimension extends BaseEntity { @PrimaryGeneratedColumn('uuid') id: string; - @ManyToOne(() => Dataset) + @ManyToOne(() => Dataset, { + onDelete: 'CASCADE', + orphanedRowAction: 'delete' + }) @JoinColumn({ name: 'dataset_id' }) - dataset: Dataset; + dataset: Promise; // Replace with actual enum types - @Column({ type: 'enum', enum: ['type1', 'type2'], nullable: false }) - type: string; - - @ManyToOne(() => RevisionEntity) + @Column({ + type: process.env.NODE_ENV === 'test' ? 'text' : 'enum', + enum: [ + DimensionType.RAW, + DimensionType.TEXT, + DimensionType.NUMERIC, + DimensionType.SYMBOL, + DimensionType.LOOKUP_TABLE, + DimensionType.TIME_PERIOD, + DimensionType.TIME_POINT + ], + nullable: false + }) + type: DimensionType; + + @ManyToOne(() => Revision) @JoinColumn({ name: 'start_revision_id' }) - start_revision: RevisionEntity; + start_revision: Promise; - @ManyToOne(() => RevisionEntity, { nullable: true }) + @ManyToOne(() => Revision, { nullable: true }) @JoinColumn({ name: 'finish_revision_id' }) - finish_revision: RevisionEntity; + finish_revision: Promise; @Column({ type: 'text', nullable: true }) validator: string; - @OneToMany(() => DimensionInfo, (dimensionInfo) => dimensionInfo.dimension) - dimensionInfos: DimensionInfo[]; + @OneToMany(() => DimensionInfo, (dimensionInfo) => dimensionInfo.dimension, { + cascade: true + }) + dimensionInfo: Promise; - @OneToMany(() => Source, (source) => source.dimension) - sources: Source[]; + @OneToMany(() => Source, (source) => source.dimension, { + cascade: true + }) + sources: Promise; } diff --git a/src/entity2/dimension_info.ts b/src/entity2/dimension_info.ts index 49fbe8b..4599db7 100644 --- a/src/entity2/dimension_info.ts +++ b/src/entity2/dimension_info.ts @@ -5,11 +5,10 @@ import { Dimension } from './dimension'; @Entity() export class DimensionInfo extends BaseEntity { - @PrimaryColumn({ name: 'dimension_id' }) + @PrimaryColumn({ name: 'dimension_id', type: process.env.NODE_ENV === 'test' ? 'text' : 'uuid' }) id: string; - @PrimaryColumn({ name: 'language' }) - @Column({ type: 'varchar', length: 5, nullable: true }) + @PrimaryColumn({ name: 'language', type: 'varchar', length: 5 }) language: string; @Column({ type: 'text' }) @@ -21,7 +20,10 @@ export class DimensionInfo extends BaseEntity { @Column({ type: 'text', nullable: true }) notes: string; - @ManyToOne(() => Dimension, (dimension) => dimension.dimensionInfos) + @ManyToOne(() => Dimension, (dimension) => dimension.dimensionInfo, { + onDelete: 'CASCADE', + orphanedRowAction: 'delete' + }) @JoinColumn({ name: 'dimension_id' }) - dimension: Dimension; + dimension: Promise; } diff --git a/src/entity2/dimension_types.ts b/src/entity2/dimension_types.ts new file mode 100644 index 0000000..e70d655 --- /dev/null +++ b/src/entity2/dimension_types.ts @@ -0,0 +1,11 @@ +/* eslint-disable no-shadow */ +/* eslint-disable no-unused-vars */ +export enum DimensionType { + RAW = 'RAW', + TEXT = 'TEXT', + NUMERIC = 'NUMERIC', + SYMBOL = 'SYMBOL', + LOOKUP_TABLE = 'LOOKUP_TABLE', + TIME_PERIOD = 'TIME_PERIOD', + TIME_POINT = 'TIME_POINT' +} diff --git a/src/entity2/import.ts b/src/entity2/import.ts index bd97b78..93e8690 100644 --- a/src/entity2/import.ts +++ b/src/entity2/import.ts @@ -1,16 +1,7 @@ -import { - Entity, - PrimaryGeneratedColumn, - Column, - BaseEntity, - OneToOne, - ManyToOne, - OneToMany, - JoinColumn -} from 'typeorm'; +import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, ManyToOne, OneToMany, JoinColumn } from 'typeorm'; // eslint-disable-next-line import/no-cycle -import { RevisionEntity } from './revision'; +import { Revision } from './revision'; // eslint-disable-next-line import/no-cycle import { CsvInfo } from './csv_info'; // eslint-disable-next-line import/no-cycle @@ -21,13 +12,18 @@ export class Import extends BaseEntity { @PrimaryGeneratedColumn('uuid') id: string; - @ManyToOne(() => RevisionEntity) + @ManyToOne(() => Revision, { + onDelete: 'CASCADE', + orphanedRowAction: 'delete' + }) @JoinColumn({ name: 'revision_id' }) - revision: RevisionEntity; + revision: Promise; - @OneToOne(() => CsvInfo, (csvInfo) => csvInfo.import, { onDelete: 'CASCADE' }) + @OneToMany(() => CsvInfo, (csvInfo) => csvInfo.import, { + cascade: true + }) @JoinColumn({ name: 'csv_info' }) - csvInfo: CsvInfo; + csvInfo: Promise; @Column({ type: 'varchar', length: 255 }) mime_type: string; @@ -38,15 +34,25 @@ export class Import extends BaseEntity { @Column({ type: 'varchar', length: 255 }) hash: string; - @Column({ type: 'timestamp', default: () => 'CURRENT_TIMESTAMP' }) + @Column({ type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', default: () => 'CURRENT_TIMESTAMP' }) uploaded_at: Date; - @Column({ type: 'enum', enum: ['Draft', 'FactTable', 'LookupTable'], nullable: false }) + @Column({ + type: process.env.NODE_ENV === 'test' ? 'text' : 'enum', + enum: ['Draft', 'FactTable', 'LookupTable'], + nullable: false + }) type: string; - @Column({ type: 'enum', enum: ['BlobStorage', 'Datalake'], nullable: false }) + @Column({ + type: process.env.NODE_ENV === 'test' ? 'text' : 'enum', + enum: ['BlobStorage', 'Datalake'], + nullable: false + }) location: string; - @OneToMany(() => Source, (source) => source.import) - sources: Source[]; + @OneToMany(() => Source, (source) => source.import, { + cascade: true + }) + sources: Promise; } diff --git a/src/entity2/revision.ts b/src/entity2/revision.ts index ae02120..1478294 100644 --- a/src/entity2/revision.ts +++ b/src/entity2/revision.ts @@ -2,60 +2,69 @@ import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, JoinColumn, OneToMa // eslint-disable-next-line import/no-cycle import { Dataset } from './dataset'; -import { User } from './user'; +import { Users } from './users'; // eslint-disable-next-line import/no-cycle import { Import } from './import'; -interface Revision { +interface RevisionInterface { id: string; revision_index: number; - dataset: Dataset; + dataset: Promise; creation_date: Date; - previous_revision: Revision; + previous_revision: Promise; online_cube_filename: string; publish_date: Date; approval_date: Date; - approved_by: User; - created_by: User; - imports: Import[]; + approved_by: Promise; + created_by: Promise; + imports: Promise; } @Entity() -export class RevisionEntity extends BaseEntity implements Revision { +export class Revision extends BaseEntity implements RevisionInterface { @PrimaryGeneratedColumn('uuid') id: string; @Column({ type: 'int' }) revision_index: number; - @ManyToOne(() => Dataset, (dataset) => dataset.revisions) + @ManyToOne(() => Dataset, (dataset) => dataset.revisions, { + onDelete: 'CASCADE', + orphanedRowAction: 'delete' + }) @JoinColumn({ name: 'dataset_id' }) - dataset: Dataset; + dataset: Promise; - @Column({ type: 'timestamp', default: () => 'CURRENT_TIMESTAMP' }) + @Column({ type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', default: () => 'CURRENT_TIMESTAMP' }) creation_date: Date; - @ManyToOne(() => RevisionEntity, { nullable: true }) + @ManyToOne(() => Revision, { + nullable: true, + onDelete: 'CASCADE', + orphanedRowAction: 'delete' + }) @JoinColumn({ name: 'previous_revision_id' }) - previous_revision: Revision; + previous_revision: Promise; @Column({ type: 'varchar', length: 255, nullable: true }) online_cube_filename: string; - @Column({ type: 'timestamp', nullable: true }) + @Column({ type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', nullable: true }) publish_date: Date; - @Column({ type: 'timestamp', nullable: true }) + @Column({ type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', nullable: true }) approval_date: Date; - @OneToMany(() => Import, (importEntity) => importEntity.revision) - imports: Import[]; + @OneToMany(() => Import, (importEntity) => importEntity.revision, { + cascade: true + }) + imports: Promise; - @ManyToOne(() => User, { nullable: true }) + @ManyToOne(() => Users, { nullable: true }) @JoinColumn({ name: 'approved_by' }) - approved_by: User; + approved_by: Promise; - @ManyToOne(() => User) + @ManyToOne(() => Users) @JoinColumn({ name: 'created_by' }) - created_by: User; + created_by: Promise; } diff --git a/src/entity2/source.ts b/src/entity2/source.ts index 7c8a83c..8b381a7 100644 --- a/src/entity2/source.ts +++ b/src/entity2/source.ts @@ -4,34 +4,51 @@ import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, ManyToOne, JoinColu import { Dimension } from './dimension'; // eslint-disable-next-line import/no-cycle import { Import } from './import'; -import { RevisionEntity } from './revision'; +import { Revision } from './revision'; @Entity() export class Source extends BaseEntity { @PrimaryGeneratedColumn('uuid') id: string; - @ManyToOne(() => Dimension) + @ManyToOne(() => Dimension, { + onDelete: 'CASCADE', + orphanedRowAction: 'delete' + }) @JoinColumn({ name: 'dimension_id' }) - dimension: Dimension; + dimension: Promise; - @ManyToOne(() => Import, (importEntity) => importEntity.sources, { nullable: false }) + @ManyToOne(() => Import, (importEntity) => importEntity.sources, { + nullable: false, + onDelete: 'CASCADE', + orphanedRowAction: 'delete' + }) @JoinColumn({ name: 'import_id' }) - import: Import; + import: Promise; - @ManyToOne(() => RevisionEntity) + @ManyToOne(() => Revision, { + onDelete: 'CASCADE', + orphanedRowAction: 'delete' + }) @JoinColumn({ name: 'revision_id' }) - revision: RevisionEntity; + revision: Promise; // Not implemented yet // @ManyToOne(() => LookupTableRevision) // @JoinColumn({ name: 'lookup_table_revision_id' }) // lookupTableRevision: LookupTableRevision; + @Column({ type: 'int', nullable: false }) + column_index: number; + @Column({ type: 'text' }) csv_field: string; // Replace with actual enum types - @Column({ type: 'enum', enum: ['action1', 'action2'], nullable: false }) + @Column({ + type: process.env.NODE_ENV === 'test' ? 'text' : 'enum', + enum: ['create', 'append', 'truncate-then-load', 'ignore'], + nullable: false + }) action: string; } diff --git a/src/entity2/user.ts b/src/entity2/users.ts similarity index 74% rename from src/entity2/user.ts rename to src/entity2/users.ts index 48482b1..7f61ded 100644 --- a/src/entity2/user.ts +++ b/src/entity2/users.ts @@ -1,7 +1,7 @@ import { Entity, PrimaryGeneratedColumn, Column, BaseEntity } from 'typeorm'; @Entity() -export class User extends BaseEntity { +export class Users extends BaseEntity { @PrimaryGeneratedColumn('uuid') id: string; @@ -23,7 +23,7 @@ export class User extends BaseEntity { @Column({ type: 'text', nullable: true }) id_token: string; - @Column({ type: 'timestamp', nullable: true }) + @Column({ type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', nullable: true }) token_expiry: Date; @Column({ nullable: true }) @@ -38,17 +38,21 @@ export class User extends BaseEntity { @Column({ nullable: true }) profile_picture: string; - @Column({ type: 'timestamp', default: () => 'CURRENT_TIMESTAMP' }) + @Column({ type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', default: () => 'CURRENT_TIMESTAMP' }) created_at: Date; - @Column({ type: 'timestamp', default: () => 'CURRENT_TIMESTAMP', onUpdate: 'CURRENT_TIMESTAMP' }) + @Column({ + type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', + default: () => 'CURRENT_TIMESTAMP', + onUpdate: 'CURRENT_TIMESTAMP' + }) updated_at: Date; @Column({ type: 'boolean', default: true }) active: boolean; - public static getTestUser(): User { - const user = new User(); + public static getTestUser(): Users { + const user = new Users(); user.id = '12345678-1234-1234-1234-123456789012'; user.email = 'test@test.com'; user.oidc_subject = ''; diff --git a/src/migration/1723729297617-migration.ts b/src/migration/1723729297617-migration.ts index 45c9bb0..5abd7d4 100644 --- a/src/migration/1723729297617-migration.ts +++ b/src/migration/1723729297617-migration.ts @@ -4,8 +4,7 @@ export class Migration1723729297617 implements MigrationInterface { public async up(queryRunner: QueryRunner): Promise { await queryRunner.query(` CREATE TABLE users ( - id UUID PRIMARY KEY, - username VARCHAR(255) NOT NULL UNIQUE, + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), email VARCHAR(255) NOT NULL UNIQUE, oidc_subject VARCHAR(255) UNIQUE, oidc_issuer VARCHAR(255), @@ -13,16 +12,17 @@ export class Migration1723729297617 implements MigrationInterface { refresh_token TEXT, id_token TEXT, token_expiry TIMESTAMP, - first_name VARCHAR(255), + name VARCHAR(255), + given_name VARCHAR(255), last_name VARCHAR(255), profile_picture VARCHAR(255), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, active BOOLEAN NOT NULL DEFAULT true ); CREATE TABLE dataset ( - id UUID PRIMARY KEY, + id UUID DEFAULT gen_random_uuid() PRIMARY KEY, creation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP, created_by UUID, live TIMESTAMP, @@ -31,26 +31,16 @@ export class Migration1723729297617 implements MigrationInterface { ); CREATE TABLE dataset_info ( - id UUID PRIMARY KEY, dataset_id UUID, language VARCHAR(5), title TEXT, description TEXT, + PRIMARY KEY (dataset_id, language), FOREIGN KEY (dataset_id) REFERENCES dataset(id) ON DELETE CASCADE ); - CREATE TABLE dimension_info ( - id UUID PRIMARY KEY, - dimension_id UUID, - language VARCHAR(5), - name TEXT, - description TEXT, - notes TEXT, - FOREIGN KEY (dimension_id) REFERENCES dimension(id) ON DELETE CASCADE - ); - CREATE TABLE revision ( - id UUID PRIMARY KEY, + id UUID DEFAULT gen_random_uuid() PRIMARY KEY, revision_index INT, dataset_id UUID, creation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP, @@ -66,10 +56,12 @@ export class Migration1723729297617 implements MigrationInterface { FOREIGN KEY (created_by) REFERENCES users(id) ); + CREATE TYPE dimension_type AS ENUM ('RAW', 'TEXT', 'NUMERIC', 'SYMBOL', 'LOOKUP_TABLE', 'TIME_PERIOD', 'TIME_POINT'); + CREATE TABLE dimension ( - id UUID PRIMARY KEY, + id UUID DEFAULT gen_random_uuid() PRIMARY KEY, dataset_id UUID, - type VARCHAR(255) NOT NULL, + type dimension_type NOT NULL, start_revision_id UUID NOT NULL, finish_revision_id UUID, validator TEXT, @@ -78,6 +70,31 @@ export class Migration1723729297617 implements MigrationInterface { FOREIGN KEY (finish_revision_id) REFERENCES revision(id) ON DELETE SET NULL ); + CREATE TABLE dimension_info ( + dimension_id UUID, + language VARCHAR(5), + name TEXT, + description TEXT, + notes TEXT, + PRIMARY KEY (dimension_id, language), + FOREIGN KEY (dimension_id) REFERENCES dimension(id) ON DELETE CASCADE + ); + + CREATE TYPE import_type AS ENUM ('Draft', 'FactTable', 'LookupTable'); + CREATE TYPE location_type AS ENUM ('BlobStorage', 'Datalake'); + + CREATE TABLE import ( + id UUID DEFAULT gen_random_uuid() PRIMARY KEY, + revision_id UUID, + mime_type VARCHAR(255), + filename VARCHAR(255), + hash VARCHAR(255), + uploaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + type import_type NOT NULL, + location location_type NOT NULL, + FOREIGN KEY (revision_id) REFERENCES revision(id) ON DELETE CASCADE + ); + CREATE TABLE csv_info ( import_id UUID PRIMARY KEY, delimiter CHAR(1), @@ -86,28 +103,54 @@ export class Migration1723729297617 implements MigrationInterface { FOREIGN KEY (import_id) REFERENCES import(id) ON DELETE CASCADE ); - CREATE TABLE import ( - id UUID PRIMARY KEY, - revision_id UUID, - csv_info UUID UNIQUE, - mime_type VARCHAR(255), - filename VARCHAR(255), - FOREIGN KEY (revision_id) REFERENCES revision(id) ON DELETE CASCADE, - FOREIGN KEY (csv_info) REFERENCES csv_info(import_id) ON DELETE CASCADE - ); + CREATE TYPE source_action_type AS ENUM ('create', 'append', 'truncate-then-load', 'ignore'); CREATE TABLE source ( - id UUID PRIMARY KEY, + id UUID DEFAULT gen_random_uuid() PRIMARY KEY, dimension_id UUID, import_id UUID UNIQUE, revision_id UUID, - lookup_table_revision_id UUID, + column_index INT, csv_field TEXT, - action VARCHAR(255) NOT NULL, + action source_action_type NOT NULL, FOREIGN KEY (dimension_id) REFERENCES dimension(id) ON DELETE CASCADE, FOREIGN KEY (import_id) REFERENCES import(id) ON DELETE CASCADE, - FOREIGN KEY (revision_id) REFERENCES revision(id) ON DELETE CASCADE, - FOREIGN KEY (lookup_table_revision_id) REFERENCES revision(id) ON DELETE SET NULL + FOREIGN KEY (revision_id) REFERENCES revision(id) ON DELETE CASCADE + ); + + INSERT INTO users ( + id, + email, + oidc_subject, + oidc_issuer, + access_token, + refresh_token, + id_token, + token_expiry, + name, + given_name, + last_name, + profile_picture, + created_at, + updated_at, + active + ) + VALUES ( + '12345678-1234-1234-1234-123456789012', + 'test@test.com', + '', + 'localAuth', + '', + '', + '', + NULL, + 'Test User', + 'Test', + 'User', -- Corrected by closing the quote + '', + CURRENT_TIMESTAMP, + CURRENT_TIMESTAMP, + true ); `); } diff --git a/src/route/dataset-route.ts b/src/route/dataset-route.ts index a5ff5e5..a5c1dc4 100644 --- a/src/route/dataset-route.ts +++ b/src/route/dataset-route.ts @@ -1,23 +1,28 @@ /* eslint-disable no-warning-comments */ /* eslint-disable import/no-cycle */ +import { randomUUID } from 'crypto'; +import { Readable } from 'stream'; + import { Request, Response, Router } from 'express'; import multer from 'multer'; import pino from 'pino'; -import { ViewErrDTO, ViewDTO } from '../dtos2/view-dto'; +import { ViewErrDTO, ViewDTO, ViewStream } from '../dtos2/view-dto'; import { ENGLISH, WELSH, t } from '../app'; import { processCSVFromDatalake, processCSVFromBlobStorage, - uploadCSVToBlobStorage, - DEFAULT_PAGE_SIZE + uploadCSVBufferToBlobStorage, + DEFAULT_PAGE_SIZE, + getFileFromBlobStorage, + getFileFromDataLake } from '../controllers/csv-processor'; +import { Users } from '../entity2/users'; import { Dataset } from '../entity2/dataset'; import { DatasetInfo } from '../entity2/dataset_info'; import { Dimension } from '../entity2/dimension'; -import { RevisionEntity } from '../entity2/revision'; +import { Revision } from '../entity2/revision'; import { Import } from '../entity2/import'; -import { User } from '../entity2/user'; import { DatasetTitle, FileDescription } from '../dtos2/filelist'; import { DatasetDTO, DimensionDTO, RevisionDTO } from '../dtos2/dataset-dto'; @@ -76,9 +81,9 @@ async function validateDimension(dimensionID: string, res: Response): Promise { +async function validateRevision(revisionID: string, res: Response): Promise { if (!validateIds(revisionID, REVISION, res)) return null; - const revision = await RevisionEntity.findOneBy({ id: revisionID }); + const revision = await Revision.findOneBy({ id: revisionID }); if (!revision) { res.status(404); res.json({ message: 'Revision not found.' }); @@ -147,7 +152,7 @@ apiRoute.post('/', upload.single('csv'), async (req: Request, res: Response) => } let importRecord: Import; try { - importRecord = await uploadCSVToBlobStorage(req.file?.stream, req.file?.mimetype); + importRecord = await uploadCSVBufferToBlobStorage(req.file.buffer, req.file?.mimetype); } catch (err) { logger.error(`An error occured trying to upload the file with the following error: ${err}`); res.status(500); @@ -157,25 +162,31 @@ apiRoute.post('/', upload.single('csv'), async (req: Request, res: Response) => // Everything looks good so far, let's create the dataset and revision records const dataset = new Dataset(); + dataset.id = randomUUID(); dataset.creation_date = new Date(); + const user = await Users.findOneBy({ id: Users.getTestUser().id }); + if (user === null) { + throw new Error('Test user not found'); + } // TODO change how we handle authentication to get the user on the Backend // We are using a stub test user for all requests at the moment - dataset.created_by = User.getTestUser(); + dataset.created_by = Promise.resolve(user); const datasetInfo = new DatasetInfo(); datasetInfo.language = lang; datasetInfo.title = title; - datasetInfo.dataset = dataset; - dataset.datasetInfos = [datasetInfo]; - const revision = new RevisionEntity(); - revision.dataset = dataset; + datasetInfo.dataset = Promise.resolve(dataset); + dataset.datasetInfo = Promise.resolve([datasetInfo]); + const revision = new Revision(); + revision.dataset = Promise.resolve(dataset); revision.revision_index = 1; revision.creation_date = new Date(); - // TODO change how we handle authentication to get the user on the Backend - revision.created_by = User.getTestUser(); - importRecord.revision = revision; - revision.imports = [importRecord]; - const savedDataset = await dataset.save(); - const uploadDTO = DatasetDTO.fromDatasetWithImports(savedDataset); + revision.created_by = Promise.resolve(user); + dataset.revisions = Promise.resolve([revision]); + importRecord.revision = Promise.resolve(revision); + revision.imports = Promise.resolve([importRecord]); + await dataset.save(); + const uploadDTO = await DatasetDTO.fromDatasetWithRevisionsAndImports(dataset); + res.status(201); res.json(uploadDTO); }); @@ -188,10 +199,11 @@ apiRoute.get('/', async (req: Request, res: Response) => { const fileList: FileDescription[] = []; for (const dataset of datasets) { const titles: DatasetTitle[] = []; - for (const datasetInfo of dataset.datasetInfos) { + const datasetInfo = await dataset.datasetInfo; + for (const info of datasetInfo) { titles.push({ - title: datasetInfo.title, - language: datasetInfo.language + title: info.title, + language: info.language }); } fileList.push({ @@ -208,8 +220,8 @@ apiRoute.get('/', async (req: Request, res: Response) => { apiRoute.get('/:dataset_id', async (req: Request, res: Response) => { const datasetID: string = req.params.dataset_id; const dataset = await validateDataset(datasetID, res); - if (dataset) return; - const dto = DatasetDTO.fromDatasetWithShallowDimensionsAndRevisions(dataset); + if (!dataset) return; + const dto = await DatasetDTO.fromDatasetComplete(dataset); res.json(dto); }); @@ -218,11 +230,11 @@ apiRoute.get('/:dataset_id', async (req: Request, res: Response) => { apiRoute.get('/:dataset_id/dimension/by-id/:dimension_id', async (req: Request, res: Response) => { const datasetID: string = req.params.dataset_id; const dataset = await validateDataset(datasetID, res); - if (dataset) return; + if (!dataset) return; const dimensionID: string = req.params.dimension_id; const dimension = await validateDimension(dimensionID, res); if (!dimension) return; - const dto = DimensionDTO.fromDimension(dimension); + const dto = await DimensionDTO.fromDimension(dimension); res.json(dto); }); @@ -231,18 +243,18 @@ apiRoute.get('/:dataset_id/dimension/by-id/:dimension_id', async (req: Request, apiRoute.get('/:dataset_id/revision/by-id/:revision_id', async (req: Request, res: Response) => { const datasetID: string = req.params.dataset_id; const dataset = await validateDataset(datasetID, res); - if (dataset) return; + if (!dataset) return; const revisionID: string = req.params.revision_id; const revision = await validateRevision(revisionID, res); if (!revision) return; - const dto = RevisionDTO.fromRevision(revision); + const dto = await RevisionDTO.fromRevision(revision); res.json(dto); }); // GET /api/dataset/:dataset_id/revision/id/:revision_id/import/id/:import_id/preview // Returns a view of the data file attached to the import apiRoute.get( - '/:dataset/revision/by-id/:revision_id/import/by-id/:import_id/preview', + '/:dataset_id/revision/by-id/:revision_id/import/by-id/:import_id/preview', async (req: Request, res: Response) => { const datasetID: string = req.params.dataset_id; const dataset = await validateDataset(datasetID, res); @@ -263,17 +275,63 @@ apiRoute.get( } else if (importRecord.location === 'Datalake') { processedCSV = await processCSVFromDatalake(dataset, importRecord, page_number, page_size); } else { - res.status(500); + res.status(400); res.json({ message: 'Import location not supported.' }); return; } if (!processedCSV.success) { - res.status(500); + res.status(400); } res.json(processedCSV); } ); +// GET /api/dataset/:dataset_id/revision/id/:revision_id/import/id/:import_id/raw +// Returns the original uploaded file back to the client +apiRoute.get( + '/:dataset_id/revision/by-id/:revision_id/import/by-id/:import_id/raw', + async (req: Request, res: Response) => { + const datasetID: string = req.params.dataset_id; + const dataset = await validateDataset(datasetID, res); + if (!dataset) return; + const revisionID: string = req.params.revision_id; + const revision = await validateRevision(revisionID, res); + if (!revision) return; + const importID: string = req.params.import_id; + const importRecord = await validateImport(importID, res); + if (!importRecord) return; + let viewStream: ViewErrDTO | ViewStream; + if (importRecord.location === 'BlobStorage') { + viewStream = await getFileFromBlobStorage(dataset, importRecord); + } else if (importRecord.location === 'Datalake') { + viewStream = await getFileFromDataLake(dataset, importRecord); + } else { + res.status(400); + res.json({ message: 'Import location not supported.' }); + return; + } + if (!viewStream.success) { + res.status(400); + res.json(viewStream); + return; + } + const readable: Readable = (viewStream as ViewStream).stream; + readable.pipe(res); + + // Handle errors in the file stream + readable.on('error', (err) => { + console.error('File stream error:', err); + res.writeHead(500, { 'Content-Type': 'text/plain' }); + res.end('Server Error'); + }); + + // Optionally listen for the end of the stream + readable.on('end', () => { + console.log('File stream ended'); + }); + } +); + // apiRoute.get('/:dataset/view', async (req: Request, res: Response) => { // const datasetID = req.params.dataset; // if (!checkDatasetID(datasetID, res)) return; diff --git a/src/server.ts b/src/server.ts index dccce65..3edbee3 100644 --- a/src/server.ts +++ b/src/server.ts @@ -1,14 +1,14 @@ import dotenv from 'dotenv'; import 'reflect-metadata'; -import app, { connectToDb } from './app'; +import app, { databaseManager } from './app'; import { datasourceOptions } from './data-source'; dotenv.config(); const PORT = process.env.BACKEND_PORT || 3000; -connectToDb(datasourceOptions); +databaseManager(datasourceOptions); app.listen(PORT, () => { console.log(`Server is running on port ${PORT}`); diff --git a/test/dataset.test.ts b/test/dataset.test.ts index 7db7d19..79aeeb7 100644 --- a/test/dataset.test.ts +++ b/test/dataset.test.ts @@ -6,10 +6,18 @@ import request from 'supertest'; import { DataLakeService } from '../src/controllers/datalake'; import { BlobStorageService } from '../src/controllers/blob-storage'; -import app, { ENGLISH, WELSH, t, dbManager, connectToDb } from '../src/app'; -import { Dataset } from '../src/entity/dataset'; -import { Datafile } from '../src/entity/datafile'; -import { DatasetDTO } from '../src/dtos2/dataset-dto'; +import app, { ENGLISH, WELSH, t, dbManager, databaseManager } from '../src/app'; +import { Dataset } from '../src/entity2/dataset'; +import { DatasetInfo } from '../src/entity2/dataset_info'; +import { Revision } from '../src/entity2/revision'; +import { Import } from '../src/entity2/import'; +import { CsvInfo } from '../src/entity2/csv_info'; +import { Source } from '../src/entity2/source'; +import { Dimension } from '../src/entity2/dimension'; +import { DimensionType } from '../src/entity2/dimension_types'; +import { DimensionInfo } from '../src/entity2/dimension_info'; +import { Users } from '../src/entity2/users'; +import { DatasetDTO, DimensionDTO, RevisionDTO } from '../src/dtos2/dataset-dto'; import { ViewErrDTO } from '../src/dtos2/view-dto'; import { MAX_PAGE_SIZE, MIN_PAGE_SIZE } from '../src/controllers/csv-processor'; @@ -23,32 +31,165 @@ BlobStorageService.prototype.uploadFile = jest.fn(); DataLakeService.prototype.uploadFile = jest.fn(); +const dataset1Id = 'BDC40218-AF89-424B-B86E-D21710BC92F1'; +const revision1Id = '85F0E416-8BD1-4946-9E2C-1C958897C6EF'; +const import1Id = 'FA07BE9D-3495-432D-8C1F-D0FC6DAAE359'; +const dimension1Id = '2D7ACD0B-A46A-43F7-8A88-224CE97FC8B9'; + describe('API Endpoints', () => { beforeAll(async () => { - await connectToDb(datasourceOptions); + await databaseManager(datasourceOptions); await dbManager.initializeDataSource(); - - const dataset1 = Dataset.createDataset('Test Data 1', 'test', 'bdc40218-af89-424b-b86e-d21710bc92f1'); - dataset1.live = true; - dataset1.code = 'tst0001'; + const user = Users.getTestUser(); + await user.save(); + // First create a dataset + const dataset1 = new Dataset(); + dataset1.id = dataset1Id; + dataset1.created_by = Promise.resolve(user); + dataset1.live = new Date(Date.now()); + // Give it some info + const datasetInfo1 = new DatasetInfo(); + datasetInfo1.dataset = Promise.resolve(dataset1); + datasetInfo1.title = 'Test Dataset 1'; + datasetInfo1.description = 'I am the first test dataset'; + datasetInfo1.language = 'en-GB'; + dataset1.datasetInfo = Promise.resolve([datasetInfo1]); + // At the sametime we also always create a first revision + const revision1 = new Revision(); + revision1.id = revision1Id; + revision1.dataset = Promise.resolve(dataset1); + revision1.created_by = Promise.resolve(user); + revision1.revision_index = 1; + dataset1.revisions = Promise.resolve([revision1]); + // Attach an import e.g. a file to the revision + const import1 = new Import(); + import1.revision = Promise.resolve(revision1); + import1.id = import1Id; + import1.filename = 'FA07BE9D-3495-432D-8C1F-D0FC6DAAE359.csv'; + const testFile1 = path.resolve(__dirname, `./test-data-2.csv`); + const testFile2Buffer = fs.readFileSync(testFile1); + import1.hash = createHash('sha256').update(testFile2Buffer).digest('hex'); + // First is a draft import and a first upload so everything is in blob storage + import1.location = 'BlobStorage'; + import1.type = 'Draft'; + import1.mime_type = 'text/csv'; + // Its a CSV file so we need to know how to parse it + const csvInfo1 = new CsvInfo(); + csvInfo1.import = Promise.resolve(import1); + csvInfo1.delimiter = ','; + csvInfo1.quote = '"'; + csvInfo1.linebreak = '\n'; + import1.csvInfo = Promise.resolve([csvInfo1]); + revision1.imports = Promise.resolve([import1]); + await dataset1.save(); + // Create some sources for each of the columns in the CSV + const sources: Source[] = []; + const source1 = new Source(); + source1.id = '304574E6-8DD0-4654-BE67-FA055C9F7C81'; + source1.import = Promise.resolve(import1); + source1.revision = Promise.resolve(revision1); + source1.csv_field = 'ID'; + source1.column_index = 0; + source1.action = 'ignore'; + sources.push(source1); + const source2 = new Source(); + source2.id = 'D3D3D3D3-8DD0-4654-BE67-FA055C9F7C81'; + source2.import = Promise.resolve(import1); + source2.revision = Promise.resolve(revision1); + source2.csv_field = 'Text'; + source2.column_index = 1; + source2.action = 'create'; + sources.push(source2); + const source3 = new Source(); + source3.id = 'D62FA390-9AB2-496E-A6CA-0C0E2FCF206E'; + source3.import = Promise.resolve(import1); + source3.revision = Promise.resolve(revision1); + source3.csv_field = 'Number'; + source3.column_index = 2; + source3.action = 'create'; + sources.push(source3); + const source4 = new Source(); + source4.id = 'FB25D668-54F2-44EF-99FE-B4EDC4AF2911'; + source4.import = Promise.resolve(import1); + source4.revision = Promise.resolve(revision1); + source4.csv_field = 'Date'; + source4.column_index = 3; + source4.action = 'create'; + sources.push(source4); + import1.sources = Promise.resolve(sources); + await import1.save(); + // Next create some dimensions + const dimensions: Dimension[] = []; + const dimension1 = new Dimension(); + dimension1.id = dimension1Id; + dimension1.dataset = Promise.resolve(dataset1); + dimension1.start_revision = Promise.resolve(revision1); + dimension1.type = DimensionType.RAW; + const dimension1Info = new DimensionInfo(); + dimension1Info.dimension = Promise.resolve(dimension1); + dimension1Info.name = 'ID'; + dimension1Info.description = 'Unique identifier'; + dimension1Info.language = 'en-GB'; + dimension1.dimensionInfo = Promise.resolve([dimension1Info]); + dimension1.sources = Promise.resolve([source1]); + source1.dimension = Promise.resolve(dimension1); + dimensions.push(dimension1); + // Dimension 2 + const dimension2 = new Dimension(); + dimension2.id = '61D51F82-0771-4C90-849E-55FFA7A4D802'; + dimension2.dataset = Promise.resolve(dataset1); + dimension2.start_revision = Promise.resolve(revision1); + dimension2.type = DimensionType.TEXT; + const dimension2Info = new DimensionInfo(); + dimension2Info.dimension = Promise.resolve(dimension2); + dimension2Info.name = 'Text'; + dimension2Info.description = 'Sample text strings'; + dimension2Info.language = 'en-GB'; + dimension2.dimensionInfo = Promise.resolve([dimension2Info]); + dimension2.sources = Promise.resolve([source2]); + source2.dimension = Promise.resolve(dimension2); + dimensions.push(dimension2); + // Dimension 3 + const dimension3 = new Dimension(); + dimension3.id = 'F4D5B0F4-180E-4020-AAD5-9300B673D92B'; + dimension3.dataset = Promise.resolve(dataset1); + dimension3.start_revision = Promise.resolve(revision1); + dimension3.type = DimensionType.NUMERIC; + const dimension3Info = new DimensionInfo(); + dimension3Info.dimension = Promise.resolve(dimension3); + dimension3Info.name = 'Value'; + dimension3Info.description = 'Sample numeric values'; + dimension3Info.language = 'en-GB'; + dimension3.dimensionInfo = Promise.resolve([dimension3Info]); + dimension3.sources = Promise.resolve([source3]); + source3.dimension = Promise.resolve(dimension3); + dimensions.push(dimension3); + // Dimension 4 + const dimension4 = new Dimension(); + dimension4.id = 'C24962F4-F395-40EF-B4DD-270E90E10972'; + dimension4.dataset = Promise.resolve(dataset1); + dimension4.start_revision = Promise.resolve(revision1); + dimension4.type = DimensionType.TIME_POINT; + const dimension4Info = new DimensionInfo(); + dimension4Info.dimension = Promise.resolve(dimension4); + dimension4Info.name = 'Date'; + dimension4Info.description = 'Sample date values'; + dimension4Info.language = 'en-GB'; + dimension4.dimensionInfo = Promise.resolve([dimension4Info]); + dimension4.sources = Promise.resolve([source4]); + source4.dimension = Promise.resolve(dimension4); + dimensions.push(dimension4); + dataset1.dimensions = Promise.resolve(dimensions); await dataset1.save(); - dataset1.addTitleByString('Test Dataset 1', 'EN'); - dataset1.addDescriptionByString('I am the first test dataset', 'EN'); + }); - const dataset2 = Dataset.createDataset('Test Data 2', 'test', 'fa07be9d-3495-432d-8c1f-d0fc6daae359'); - dataset2.live = true; - dataset2.code = 'tst0002'; - dataset2.createdBy = 'test'; - await dataset2.save(); - const datafile2 = new Datafile(); - const testFile2 = path.resolve(__dirname, `./test-data-2.csv`); - const testFile2Buffer = fs.readFileSync(testFile2); - datafile2.sha256hash = createHash('sha256').update(testFile2Buffer).digest('hex'); - datafile2.createdBy = 'test'; - datafile2.draft = false; - await dataset2.addDatafile(datafile2); - dataset2.addTitleByString('Test Dataset 2', 'EN'); - dataset2.addDescriptionByString('I am the second test dataset', 'EN'); + test('Return true test', async () => { + const dataset1 = await Dataset.findOneBy({ id: dataset1Id }); + if (!dataset1) { + throw new Error('Dataset not found'); + } + const dto = await DatasetDTO.fromDatasetComplete(dataset1); + expect(dto).toBe(dto); }); test('Upload returns 400 if no file attached', async () => { @@ -110,7 +251,7 @@ describe('API Endpoints', () => { expect(res.body).toEqual(err); }); - test('Upload returns 200 if a file is attached', async () => { + test('Upload returns 201 if a file is attached', async () => { const csvfile = path.resolve(__dirname, `./test-data-1.csv`); const res = await request(app) @@ -118,18 +259,16 @@ describe('API Endpoints', () => { .attach('csv', csvfile) .field('title', 'Test Dataset 3') .field('lang', 'en-GB'); - const dataset = await Dataset.findOneBy({ internalName: 'Test Dataset 3' }); - if (!dataset) { - expect(dataset).not.toBeNull(); + const datasetInfo = await DatasetInfo.findOneBy({ title: 'Test Dataset 3' }); + if (!datasetInfo) { + expect(datasetInfo).not.toBeNull(); return; } - const datasetDTO = await datasetToDatasetDTO(dataset); - expect(res.status).toBe(200); - expect(res.body).toEqual({ - success: true, - dataset: datasetDTO - }); - await dataset.remove(); + const dataset = await datasetInfo.dataset; + const datasetDTO = await DatasetDTO.fromDatasetWithRevisionsAndImports(dataset); + expect(res.status).toBe(201); + expect(res.body).toEqual(datasetDTO); + await Dataset.remove(dataset); }); test('Get a filelist list returns 200 with a file list', async () => { @@ -138,28 +277,57 @@ describe('API Endpoints', () => { expect(res.body).toEqual({ filelist: [ { - internal_name: 'Test Data 1', - id: 'bdc40218-af89-424b-b86e-d21710bc92f1' - }, - { - internal_name: 'Test Data 2', - id: 'fa07be9d-3495-432d-8c1f-d0fc6daae359' + titles: [{ language: 'en-GB', title: 'Test Dataset 1' }], + dataset_id: 'BDC40218-AF89-424B-B86E-D21710BC92F1' } ] }); }); + test('Get a dataset returns 200 with a shallow object', async () => { + const dataset1 = await Dataset.findOneBy({ id: dataset1Id }); + if (!dataset1) { + throw new Error('Dataset not found'); + } + const dto = await DatasetDTO.fromDatasetComplete(dataset1); + const res = await request(app).get(`/en-GB/dataset/${dataset1Id}`); + expect(res.status).toBe(200); + expect(res.body).toEqual(dto); + }); + + test('Get a dimension returns 200 with a shallow object', async () => { + const dimension = await Dimension.findOneBy({ id: dimension1Id }); + if (!dimension) { + throw new Error('Dataset not found'); + } + const dto = await DimensionDTO.fromDimension(dimension); + const res = await request(app).get(`/en-GB/dataset/${dataset1Id}/dimension/by-id/${dimension1Id}`); + expect(res.status).toBe(200); + expect(res.body).toEqual(dto); + }); + + test('Get a revision returns 200 with a shallow object', async () => { + const revision = await Revision.findOneBy({ id: revision1Id }); + if (!revision) { + throw new Error('Dataset not found'); + } + const dto = await RevisionDTO.fromRevision(revision); + const res = await request(app).get(`/en-GB/dataset/${dataset1Id}/revision/by-id/${revision1Id}`); + expect(res.status).toBe(200); + expect(res.body).toEqual(dto); + }); + test('Get file view returns 400 if page_number is too high', async () => { const testFile2 = path.resolve(__dirname, `./test-data-2.csv`); const testFile2Buffer = fs.readFileSync(testFile2); - DataLakeService.prototype.downloadFile = jest.fn().mockReturnValue(testFile2Buffer); + BlobStorageService.prototype.readFile = jest.fn().mockReturnValue(testFile2Buffer); const res = await request(app) - .get('/en-GB/dataset/fa07be9d-3495-432d-8c1f-d0fc6daae359/view') + .get(`/en-GB/dataset/${dataset1Id}/revision/by-id/${revision1Id}/import/by-id/${import1Id}/preview`) .query({ page_number: 20 }); - expect(res.status).toBe(500); + expect(res.status).toBe(400); expect(res.body).toEqual({ success: false, - dataset_id: 'fa07be9d-3495-432d-8c1f-d0fc6daae359', + dataset_id: dataset1Id, errors: [ { field: 'page_number', @@ -179,15 +347,15 @@ describe('API Endpoints', () => { test('Get file view returns 400 if page_size is too high', async () => { const testFile2 = path.resolve(__dirname, `./test-data-2.csv`); const testFile2Buffer = fs.readFileSync(testFile2); - DataLakeService.prototype.downloadFile = jest.fn().mockReturnValue(testFile2Buffer); + BlobStorageService.prototype.readFile = jest.fn().mockReturnValue(testFile2Buffer); const res = await request(app) - .get('/en-GB/dataset/fa07be9d-3495-432d-8c1f-d0fc6daae359/view') + .get(`/en-GB/dataset/${dataset1Id}/revision/by-id/${revision1Id}/import/by-id/${import1Id}/preview`) .query({ page_size: 1000 }); - expect(res.status).toBe(500); + expect(res.status).toBe(400); expect(res.body).toEqual({ success: false, - dataset_id: 'fa07be9d-3495-432d-8c1f-d0fc6daae359', + dataset_id: dataset1Id, errors: [ { field: 'page_size', @@ -221,15 +389,15 @@ describe('API Endpoints', () => { test('Get file view returns 400 if page_size is too low', async () => { const testFile2 = path.resolve(__dirname, `./test-data-2.csv`); const testFile2Buffer = fs.readFileSync(testFile2); - DataLakeService.prototype.downloadFile = jest.fn().mockReturnValue(testFile2Buffer); + BlobStorageService.prototype.readFile = jest.fn().mockReturnValue(testFile2Buffer); const res = await request(app) - .get('/en-GB/dataset/fa07be9d-3495-432d-8c1f-d0fc6daae359/view') + .get(`/en-GB/dataset/${dataset1Id}/revision/by-id/${revision1Id}/import/by-id/${import1Id}/preview`) .query({ page_size: 1 }); - expect(res.status).toBe(500); + expect(res.status).toBe(400); expect(res.body).toEqual({ success: false, - dataset_id: 'fa07be9d-3495-432d-8c1f-d0fc6daae359', + dataset_id: dataset1Id, errors: [ { field: 'page_size', @@ -262,37 +430,23 @@ describe('API Endpoints', () => { test('Get file rertunrs 200 and complete file data', async () => { const testFile2 = path.resolve(__dirname, `./test-data-2.csv`); + const testFileStream = fs.createReadStream(testFile2); const testFile2Buffer = fs.readFileSync(testFile2); - DataLakeService.prototype.downloadFile = jest.fn().mockReturnValue(testFile2Buffer.toString()); - const dataset = await Dataset.findOneBy({ id: 'fa07be9d-3495-432d-8c1f-d0fc6daae359' }); - if (!dataset) { - expect(dataset).not.toBeNull(); - return; - } - - const res = await request(app).get('/en-GB/dataset/fa07be9d-3495-432d-8c1f-d0fc6daae359/'); - expect(res.status).toBe(200); - const expectedDTO = await datasetToDatasetDTO(dataset); - expect(res.body).toEqual(expectedDTO); - }); - - test('Get csv file rertunrs 200 and complete file data', async () => { - const testFile2 = path.resolve(__dirname, `./test-data-2.csv`); - const testFile2Buffer = fs.readFileSync(testFile2); - DataLakeService.prototype.downloadFile = jest.fn().mockReturnValue(testFile2Buffer.toString()); - - const res = await request(app).get('/en-GB/dataset/fa07be9d-3495-432d-8c1f-d0fc6daae359/csv'); + BlobStorageService.prototype.getReadableStream = jest.fn().mockReturnValue(testFileStream); + const res = await request(app).get( + `/en-GB/dataset/${dataset1Id}/revision/by-id/${revision1Id}/import/by-id/${import1Id}/raw` + ); expect(res.status).toBe(200); expect(res.text).toEqual(testFile2Buffer.toString()); }); - test('Get file view returns 200 and correct page data', async () => { + test('Get preview of an import returns 200 and correct page data', async () => { const testFile2 = path.resolve(__dirname, `./test-data-2.csv`); const testFile1Buffer = fs.readFileSync(testFile2); - DataLakeService.prototype.downloadFile = jest.fn().mockReturnValue(testFile1Buffer.toString()); + BlobStorageService.prototype.readFile = jest.fn().mockReturnValue(testFile1Buffer.toString()); const res = await request(app) - .get('/en-GB/dataset/fa07be9d-3495-432d-8c1f-d0fc6daae359/view') + .get(`/en-GB/dataset/${dataset1Id}/revision/by-id/${revision1Id}/import/by-id/${import1Id}/preview`) .query({ page_number: 2, page_size: 100 }); expect(res.status).toBe(200); expect(res.body.current_page).toBe(2); @@ -303,30 +457,20 @@ describe('API Endpoints', () => { expect(res.body.data[99]).toEqual(['200', 'QhBxdmrUPb', '3256099', '2026-12-17']); }); - test('Get file view returns 404 when a non-existant file is requested', async () => { - DataLakeService.prototype.downloadFile = jest.fn().mockReturnValue(null); - - const res = await request(app).get('/en-GB/dataset/test-data-4.csv/csv'); - expect(res.status).toBe(400); - expect(res.body).toEqual({ message: 'Dataset ID is not valid' }); + test('Get preview of an import returns 404 when a non-existant import is requested', async () => { + const res = await request(app).get( + `/en-GB/dataset/${dataset1Id}/revision/by-id/${revision1Id}/import/by-id/97C3F48F-127C-4317-B39C-87350F222310/preview` + ); + expect(res.status).toBe(404); + expect(res.body).toEqual({ message: 'Import not found.' }); }); - test('Get file view returns 404 when a not valid UUID is supplied', async () => { - DataLakeService.prototype.downloadFile = jest.fn().mockReturnValue(null); - - const res = await request(app).get('/en-GB/dataset/test-data-4.csv/view'); + test('Get file view returns 400 when a not valid UUID is supplied', async () => { + const res = await request(app).get(`/en-GB/dataset/NOT-VALID-ID`); expect(res.status).toBe(400); expect(res.body).toEqual({ message: 'Dataset ID is not valid' }); }); - test('Get file view returns 404 when a UUID is not present', async () => { - DataLakeService.prototype.downloadFile = jest.fn().mockReturnValue(null); - - const res = await request(app).get('/en-GB/dataset/fa07be9d-3495-432d-8c1f-d0fc6daae111/view'); - expect(res.status).toBe(404); - expect(res.body).toEqual({ message: 'Dataset not found... Dataset ID not found in Database' }); - }); - afterAll(async () => { await dbManager.getDataSource().dropDatabase(); }); diff --git a/test/test-data-source.ts b/test/test-data-source.ts index fe52d31..ff02af2 100644 --- a/test/test-data-source.ts +++ b/test/test-data-source.ts @@ -2,13 +2,15 @@ import 'reflect-metadata'; import { DataSourceOptions } from 'typeorm'; import * as dotenv from 'dotenv'; -import { Dataset } from '../src/entity/dataset'; -import { Datafile } from '../src/entity/datafile'; -import { LookupTable } from '../src/entity/lookuptable'; -import { DatasetTitle } from '../src/entity/dataset_title'; -import { DatasetColumn } from '../src/entity/dataset_column'; -import { DatasetDescription } from '../src/entity/dataset_description'; -import { ColumnTitle } from '../src/entity/column_title'; +import { Dataset } from '../src/entity2/dataset'; +import { DatasetInfo } from '../src/entity2/dataset_info'; +import { Revision } from '../src/entity2/revision'; +import { Import } from '../src/entity2/import'; +import { CsvInfo } from '../src/entity2/csv_info'; +import { Source } from '../src/entity2/source'; +import { Dimension } from '../src/entity2/dimension'; +import { DimensionInfo } from '../src/entity2/dimension_info'; +import { Users } from '../src/entity2/users'; dotenv.config(); @@ -18,6 +20,6 @@ export const datasourceOptions: DataSourceOptions = { database: ':memory:', synchronize: true, logging: false, - entities: [Dataset, Datafile, LookupTable, DatasetTitle, DatasetDescription, DatasetColumn, ColumnTitle], + entities: [Dataset, DatasetInfo, Revision, Import, CsvInfo, Source, Dimension, DimensionInfo, Users], subscribers: [] }; From 3ec77c6e978be3f66a7fc89204c60f0b22094606 Mon Sep 17 00:00:00 2001 From: Jamie Maynard Date: Fri, 6 Sep 2024 13:27:17 +0100 Subject: [PATCH 4/5] Refactor following comments from PR --- .prettierrc | 5 +- src/controllers/blob-storage.ts | 5 +- src/controllers/csv-processor.ts | 74 +++++------ src/data-source.ts | 2 +- src/database-manager.ts | 20 +-- src/{dtos2 => dtos}/dataset-dto.ts | 124 +++++++++--------- src/{dtos2 => dtos}/error.ts | 0 src/{dtos2 => dtos}/filelist.ts | 0 src/{dtos2 => dtos}/processedcsv.ts | 0 src/{dtos2 => dtos}/upload-dto.ts | 0 src/{dtos2 => dtos}/view-dto.ts | 0 src/{entity2 => entities}/csv_info.ts | 0 src/{entity2 => entities}/dataset.ts | 21 ++- src/{entity2 => entities}/dataset_info.ts | 0 src/{entity2 => entities}/dimension.ts | 16 +-- src/{entity2 => entities}/dimension_info.ts | 0 .../dimension_type.ts} | 0 src/{entity2 => entities}/import.ts | 13 +- src/entities/revision.ts | 79 +++++++++++ src/{entity2 => entities}/source.ts | 8 +- src/entities/user.ts | 49 +++++++ src/entity2/revision.ts | 70 ---------- src/entity2/users.ts | 73 ----------- src/route/dataset-route.ts | 95 +++++++++----- test/dataset.test.ts | 75 +++++++---- test/test-data-source.ts | 20 +-- 26 files changed, 390 insertions(+), 359 deletions(-) rename src/{dtos2 => dtos}/dataset-dto.ts (78%) rename src/{dtos2 => dtos}/error.ts (100%) rename src/{dtos2 => dtos}/filelist.ts (100%) rename src/{dtos2 => dtos}/processedcsv.ts (100%) rename src/{dtos2 => dtos}/upload-dto.ts (100%) rename src/{dtos2 => dtos}/view-dto.ts (100%) rename src/{entity2 => entities}/csv_info.ts (100%) rename src/{entity2 => entities}/dataset.ts (76%) rename src/{entity2 => entities}/dataset_info.ts (100%) rename src/{entity2 => entities}/dimension.ts (77%) rename src/{entity2 => entities}/dimension_info.ts (100%) rename src/{entity2/dimension_types.ts => entities/dimension_type.ts} (100%) rename src/{entity2 => entities}/import.ts (85%) create mode 100644 src/entities/revision.ts rename src/{entity2 => entities}/source.ts (89%) create mode 100644 src/entities/user.ts delete mode 100644 src/entity2/revision.ts delete mode 100644 src/entity2/users.ts diff --git a/.prettierrc b/.prettierrc index 9c4858d..0706fd1 100644 --- a/.prettierrc +++ b/.prettierrc @@ -2,8 +2,5 @@ "semi": true, "trailingComma": "none", "singleQuote": true, - "printWidth": 120, - "rules": { - "no-inline-comments": "off" - } + "printWidth": 120 } diff --git a/src/controllers/blob-storage.ts b/src/controllers/blob-storage.ts index 72e0c0b..b8c249c 100644 --- a/src/controllers/blob-storage.ts +++ b/src/controllers/blob-storage.ts @@ -30,9 +30,6 @@ export class BlobStorageService { private readonly containerClient: ContainerClient; public constructor() { - logger.debug( - `Creating BlobServiceClient and ContainerClient for blob storage with account name '${accountName}' and container name '${containerName}'` - ); const sharedKeyCredential = new StorageSharedKeyCredential(accountName, accountKey); this.blobServiceClient = new BlobServiceClient( `https://${accountName}.blob.core.windows.net`, @@ -56,6 +53,7 @@ export class BlobStorageService { if (fileContent === undefined) { throw new Error('File content is undefined'); } + logger.info(`Uploading file with file '${fileName}' to blob storage`); const blockBlobClient = this.containerClient.getBlockBlobClient(fileName); @@ -86,6 +84,7 @@ export class BlobStorageService { } public async readFile(fileName: string) { + logger.info(`Getting file with file '${fileName}' to blob storage`); const blockBlobClient = this.containerClient.getBlockBlobClient(fileName); const downloadBlockBlobResponse = await blockBlobClient.download(); const readableStreamBody = downloadBlockBlobResponse.readableStreamBody; diff --git a/src/controllers/csv-processor.ts b/src/controllers/csv-processor.ts index c910b29..453912a 100644 --- a/src/controllers/csv-processor.ts +++ b/src/controllers/csv-processor.ts @@ -5,11 +5,11 @@ import { Readable } from 'stream'; import { parse } from 'csv'; import { ENGLISH, WELSH, logger, t } from '../app'; -import { DatasetDTO, ImportDTO } from '../dtos2/dataset-dto'; -import { Error } from '../dtos2/error'; -import { ViewStream, ViewDTO, ViewErrDTO } from '../dtos2/view-dto'; -import { Dataset } from '../entity2/dataset'; -import { Import } from '../entity2/import'; +import { DatasetDTO, ImportDTO } from '../dtos/dataset-dto'; +import { Error } from '../dtos/error'; +import { ViewStream, ViewDTO, ViewErrDTO } from '../dtos/view-dto'; +import { Dataset } from '../entities/dataset'; +import { Import } from '../entities/import'; import { BlobStorageService } from './blob-storage'; import { DataLakeService } from './datalake'; @@ -119,38 +119,34 @@ function validateParams(page_number: number, max_page_number: number, page_size: export const uploadCSVToBlobStorage = async (fileStream: Readable, filetype: string): Promise => { const blobStorageService = new BlobStorageService(); - if (fileStream) { - const importRecord = new Import(); - importRecord.id = randomUUID(); - importRecord.mime_type = filetype; - if (filetype === 'text/csv') { - importRecord.filename = `${importRecord.id}.csv`; - } else { - importRecord.filename = `${importRecord.id}.zip`; - } - try { - const promisedHash = hashReadableStream(fileStream) - .then((hash) => { - return hash.toString(); - }) - .catch((error) => { - throw new Error(`Error hashing stream: ${error}`); - }); - await blobStorageService.uploadFile(`${importRecord.id}.csv`, fileStream); - const resolvedHash = await promisedHash; - if (resolvedHash) importRecord.hash = resolvedHash; - importRecord.uploaded_at = new Date(Date.now()); - importRecord.type = 'Draft'; - importRecord.location = 'BlobStorage'; - return importRecord; - } catch (err) { - logger.error(err); - throw new Error('Error processing file upload to blob storage'); - } - } else { + if(!fileStream) { logger.error('No buffer to upload to blob storage'); throw new Error('No buffer to upload to blob storage'); } + const importRecord = new Import(); + importRecord.id = randomUUID(); + importRecord.mime_type = filetype; + const extension = filetype === 'text/csv' ? 'csv' : 'zip'; + importRecord.filename = `${importRecord.id}.${extension}`; + try { + const promisedHash = hashReadableStream(fileStream) + .then((hash) => { + return hash.toString(); + }) + .catch((error) => { + throw new Error(`Error hashing stream: ${error}`); + }); + await blobStorageService.uploadFile(`${importRecord.filename}`, fileStream); + const resolvedHash = await promisedHash; + if (resolvedHash) importRecord.hash = resolvedHash; + importRecord.uploaded_at = new Date(Date.now()); + importRecord.type = 'Draft'; + importRecord.location = 'BlobStorage'; + return importRecord; + } catch (err) { + logger.error(err); + throw new Error('Error processing file upload to blob storage'); + } }; export const uploadCSVBufferToBlobStorage = async (fileBuffer: Buffer, filetype: string): Promise => { @@ -180,8 +176,8 @@ async function processCSVData( delimiter: ',' }).toArray()) as string[][]; const csvheaders = dataArray.shift(); - const total_pages = Math.ceil(dataArray.length / size); - const errors = validateParams(page, total_pages, size); + const totalPages = Math.ceil(dataArray.length / size); + const errors = validateParams(page, totalPages, size); if (errors.length > 0) { return { success: false, @@ -191,11 +187,11 @@ async function processCSVData( } const csvdata = paginate(dataArray, page, size); - const pages = setupPagination(page, total_pages); + const pages = setupPagination(page, totalPages); const end_record = () => { if (size > dataArray.length) { return dataArray.length; - } else if (page === total_pages) { + } else if (page === totalPages) { return dataArray.length; } else { return page * size; @@ -214,7 +210,7 @@ async function processCSVData( }, pages, page_size: size, - total_pages, + total_pages: totalPages, headers: csvheaders, data: csvdata }; diff --git a/src/data-source.ts b/src/data-source.ts index 3e9afb5..c264298 100644 --- a/src/data-source.ts +++ b/src/data-source.ts @@ -18,7 +18,7 @@ export const datasourceOptions: DataSourceOptions = { ssl: true, synchronize: false, logging: false, - entities: [`${__dirname}/entity2/*.ts`], + entities: [`${__dirname}/entities/*.ts`], migrations: [`${__dirname}/migration/*.ts`], subscribers: [] }; diff --git a/src/database-manager.ts b/src/database-manager.ts index f4f7927..27d3908 100644 --- a/src/database-manager.ts +++ b/src/database-manager.ts @@ -2,15 +2,15 @@ import 'reflect-metadata'; import { DataSource, DataSourceOptions, EntityManager } from 'typeorm'; import { Logger } from 'pino'; -import { Dataset } from './entity2/dataset'; -import { DatasetInfo } from './entity2/dataset_info'; -import { Revision } from './entity2/revision'; -import { Import } from './entity2/import'; -import { CsvInfo } from './entity2/csv_info'; -import { Source } from './entity2/source'; -import { Dimension } from './entity2/dimension'; -import { DimensionInfo } from './entity2/dimension_info'; -import { Users } from './entity2/users'; +import { Dataset } from './entities/dataset'; +import { DatasetInfo } from './entities/dataset_info'; +import { Revision } from './entities/revision'; +import { Import } from './entities/import'; +import { CsvInfo } from './entities/csv_info'; +import { Source } from './entities/source'; +import { Dimension } from './entities/dimension'; +import { DimensionInfo } from './entities/dimension_info'; +import { User } from './entities/user'; class DatabaseManager { private datasourceOptions: DataSourceOptions; @@ -39,7 +39,7 @@ class DatabaseManager { async initializeDataSource() { this.dataSource = new DataSource({ ...this.datasourceOptions, - entities: [Dataset, DatasetInfo, Revision, Import, CsvInfo, Source, Dimension, DimensionInfo, Users] + entities: [Dataset, DatasetInfo, Revision, Import, CsvInfo, Source, Dimension, DimensionInfo, User] }); await this.dataSource diff --git a/src/dtos2/dataset-dto.ts b/src/dtos/dataset-dto.ts similarity index 78% rename from src/dtos2/dataset-dto.ts rename to src/dtos/dataset-dto.ts index b7ff7bc..e442ec4 100644 --- a/src/dtos2/dataset-dto.ts +++ b/src/dtos/dataset-dto.ts @@ -1,10 +1,10 @@ -import { Dataset } from '../entity2/dataset'; -import { Dimension } from '../entity2/dimension'; -import { DimensionInfo } from '../entity2/dimension_info'; -import { Source } from '../entity2/source'; -import { Import } from '../entity2/import'; -import { Revision } from '../entity2/revision'; -import { DatasetInfo } from '../entity2/dataset_info'; +import { Dataset } from '../entities/dataset'; +import { Dimension } from '../entities/dimension'; +import { DimensionInfo } from '../entities/dimension_info'; +import { Source } from '../entities/source'; +import { Import } from '../entities/import'; +import { Revision } from '../entities/revision'; +import { DatasetInfo } from '../entities/dataset_info'; export class DatasetInfoDTO { language?: string; @@ -43,8 +43,8 @@ export class DimensionDTO { const dimDto = new DimensionDTO(); dimDto.id = dimension.id; dimDto.type = dimension.type; - dimDto.start_revision_id = (await dimension.start_revision).id; - dimDto.finish_revision_id = (await dimension.finish_revision)?.id || ''; + dimDto.start_revision_id = (await dimension.startRevision).id; + dimDto.finish_revision_id = (await dimension.finishRevision)?.id || ''; dimDto.validator = dimension.validator; dimDto.dimensionInfo = (await dimension.dimensionInfo).map((dimInfo: DimensionInfo) => { const infoDto = new DimensionInfoDTO(); @@ -60,7 +60,7 @@ export class DimensionDTO { sourceDto.id = source.id; sourceDto.import_id = (await source.import).id; sourceDto.revision_id = (await source.revision).id; - sourceDto.csv_field = source.csv_field; + sourceDto.csv_field = source.csvField; sourceDto.action = source.action; return sourceDto; }) @@ -97,7 +97,7 @@ export class ImportDTO { sourceDto.id = source.id; sourceDto.import_id = (await source.import).id; sourceDto.revision_id = (await source.revision).id; - sourceDto.csv_field = source.csv_field; + sourceDto.csv_field = source.csvField; sourceDto.action = source.action; return sourceDto; }) @@ -122,15 +122,15 @@ export class RevisionDTO { static async fromRevision(revision: Revision): Promise { const revDto = new RevisionDTO(); revDto.id = revision.id; - revDto.revision_index = revision.revision_index; + revDto.revision_index = revision.revisionIndex; revDto.dataset_id = (await revision.dataset).id; - revDto.creation_date = revision.creation_date.toISOString(); - revDto.previous_revision_id = (await revision.previous_revision)?.id || ''; - revDto.online_cube_filename = revision.online_cube_filename; - revDto.publish_date = revision.publish_date?.toISOString() || ''; - revDto.approval_date = revision.approval_date?.toISOString() || ''; - revDto.approved_by = (await revision.approved_by)?.name || undefined; - revDto.created_by = (await revision.created_by).name; + revDto.creation_date = revision.creationDate.toISOString(); + revDto.previous_revision_id = (await revision.previousRevision)?.id || ''; + revDto.online_cube_filename = revision.onlineCubeFilename; + revDto.publish_date = revision.publishDate?.toISOString() || ''; + revDto.approval_date = revision.approvalDate?.toISOString() || ''; + revDto.approved_by = (await revision.approvedBy)?.name || undefined; + revDto.created_by = (await revision.createdBy).name; revDto.imports = await Promise.all( (await revision.imports).map(async (imp: Import) => { const impDto = new ImportDTO(); @@ -163,7 +163,7 @@ export class DatasetDTO { const dto = new DatasetDTO(); dto.id = dataset.id; dto.creation_date = dataset.creation_date.toISOString(); - dto.created_by = (await dataset.created_by).name; + dto.created_by = (await dataset.createdBy).name; dto.live = dataset.live?.toISOString() || ''; dto.archive = dataset.archive?.toISOString() || ''; dto.datasetInfo = (await dataset.datasetInfo).map((datasetInfo: DatasetInfo) => { @@ -182,7 +182,7 @@ export class DatasetDTO { const dto = new DatasetDTO(); dto.id = dataset.id; dto.creation_date = dataset.creation_date.toISOString(); - dto.created_by = (await dataset.created_by).name; + dto.created_by = (await dataset.createdBy).name; dto.live = dataset.live?.toISOString() || ''; dto.archive = dataset.archive?.toISOString() || ''; dto.datasetInfo = (await dataset.datasetInfo).map((datasetInfo: DatasetInfo) => { @@ -197,8 +197,8 @@ export class DatasetDTO { const dimDto = new DimensionDTO(); dimDto.id = dimension.id; dimDto.type = dimension.type; - dimDto.start_revision_id = (await dimension.start_revision).id; - dimDto.finish_revision_id = (await dimension.finish_revision)?.id || undefined; + dimDto.start_revision_id = (await dimension.startRevision).id; + dimDto.finish_revision_id = (await dimension.finishRevision)?.id || undefined; dimDto.validator = dimension.validator; dimDto.dimensionInfo = (await dimension.dimensionInfo).map((dimInfo: DimensionInfo) => { const infoDto = new DimensionInfoDTO(); @@ -214,7 +214,7 @@ export class DatasetDTO { sourceDto.id = source.id; sourceDto.import_id = (await source.import).id; sourceDto.revision_id = (await source.revision).id; - sourceDto.csv_field = source.csv_field; + sourceDto.csv_field = source.csvField; sourceDto.action = source.action; return sourceDto; }) @@ -226,15 +226,15 @@ export class DatasetDTO { (await dataset.revisions).map(async (revision: Revision) => { const revDto = new RevisionDTO(); revDto.id = revision.id; - revDto.revision_index = revision.revision_index; + revDto.revision_index = revision.revisionIndex; revDto.dataset_id = (await revision.dataset).id; - revDto.creation_date = revision.creation_date.toISOString(); - revDto.previous_revision_id = (await revision.previous_revision)?.id || undefined; - revDto.online_cube_filename = revision.online_cube_filename; - revDto.publish_date = revision.publish_date?.toISOString() || ''; - revDto.approval_date = revision.approval_date?.toISOString() || ''; - revDto.approved_by = (await revision.approved_by)?.name || undefined; - revDto.created_by = (await revision.created_by)?.name; + revDto.creation_date = revision.creationDate.toISOString(); + revDto.previous_revision_id = (await revision.previousRevision)?.id || undefined; + revDto.online_cube_filename = revision.onlineCubeFilename; + revDto.publish_date = revision.publishDate?.toISOString() || ''; + revDto.approval_date = revision.approvalDate?.toISOString() || ''; + revDto.approved_by = (await revision.approvedBy)?.name || undefined; + revDto.created_by = (await revision.createdBy)?.name; revDto.imports = await Promise.all( (await revision.imports).map(async (imp: Import) => { const impDto = new ImportDTO(); @@ -252,7 +252,7 @@ export class DatasetDTO { sourceDto.id = source.id; sourceDto.import_id = (await source.import).id; sourceDto.revision_id = (await source.revision).id; - sourceDto.csv_field = source.csv_field; + sourceDto.csv_field = source.csvField; sourceDto.action = source.action; return sourceDto; }) @@ -270,7 +270,7 @@ export class DatasetDTO { const dto = new DatasetDTO(); dto.id = dataset.id; dto.creation_date = dataset.creation_date.toISOString(); - dto.created_by = (await dataset.created_by).name; + dto.created_by = (await dataset.createdBy).name; dto.live = dataset.live?.toISOString() || ''; dto.archive = dataset.archive?.toISOString() || ''; dto.datasetInfo = (await dataset.datasetInfo).map((datasetInfo: DatasetInfo) => { @@ -285,15 +285,15 @@ export class DatasetDTO { (await dataset.revisions).map(async (revision: Revision) => { const revDto = new RevisionDTO(); revDto.id = revision.id; - revDto.revision_index = revision.revision_index; + revDto.revision_index = revision.revisionIndex; revDto.dataset_id = (await revision.dataset).id; - revDto.creation_date = revision.creation_date.toISOString(); - revDto.previous_revision_id = (await revision.previous_revision).id; - revDto.online_cube_filename = revision.online_cube_filename; - revDto.publish_date = revision.publish_date?.toISOString() || ''; - revDto.approval_date = revision.approval_date?.toISOString() || ''; - revDto.approved_by = (await revision.approved_by)?.name || ''; - revDto.created_by = (await revision.created_by)?.name || ''; + revDto.creation_date = revision.creationDate.toISOString(); + revDto.previous_revision_id = (await revision.previousRevision).id; + revDto.online_cube_filename = revision.onlineCubeFilename; + revDto.publish_date = revision.publishDate?.toISOString() || ''; + revDto.approval_date = revision.approvalDate?.toISOString() || ''; + revDto.approved_by = (await revision.approvedBy)?.name || ''; + revDto.created_by = (await revision.createdBy)?.name || ''; revDto.imports = []; return revDto; }) @@ -305,7 +305,7 @@ export class DatasetDTO { const dto = new DatasetDTO(); dto.id = dataset.id; dto.creation_date = dataset.creation_date.toISOString(); - dto.created_by = (await dataset.created_by).name; + dto.created_by = (await dataset.createdBy).name; dto.live = dataset.live?.toISOString() || ''; dto.archive = dataset.archive?.toISOString() || ''; dto.datasetInfo = (await dataset.datasetInfo).map((datasetInfo: DatasetInfo) => { @@ -320,14 +320,14 @@ export class DatasetDTO { (await dataset.revisions).map(async (revision: Revision) => { const revDto = new RevisionDTO(); revDto.id = revision.id; - revDto.revision_index = revision.revision_index; - revDto.creation_date = revision.creation_date.toISOString(); - revDto.previous_revision_id = (await revision.previous_revision)?.id || undefined; - revDto.online_cube_filename = revision.online_cube_filename; - revDto.publish_date = revision.publish_date?.toISOString() || ''; - revDto.approval_date = revision.approval_date?.toISOString() || ''; - revDto.approved_by = (await revision.approved_by)?.name || undefined; - revDto.created_by = (await revision.created_by)?.name; + revDto.revision_index = revision.revisionIndex; + revDto.creation_date = revision.creationDate.toISOString(); + revDto.previous_revision_id = (await revision.previousRevision)?.id || undefined; + revDto.online_cube_filename = revision.onlineCubeFilename; + revDto.publish_date = revision.publishDate?.toISOString() || ''; + revDto.approval_date = revision.approvalDate?.toISOString() || ''; + revDto.approved_by = (await revision.approvedBy)?.name || undefined; + revDto.created_by = (await revision.createdBy)?.name; revDto.imports = await Promise.all( (await revision.imports).map((imp: Import) => { const impDto = new ImportDTO(); @@ -351,7 +351,7 @@ export class DatasetDTO { const dto = new DatasetDTO(); dto.id = dataset.id; dto.creation_date = dataset.creation_date.toISOString(); - dto.created_by = (await dataset.created_by).name; + dto.created_by = (await dataset.createdBy).name; dto.live = dataset.live?.toISOString() || ''; dto.archive = dataset.archive?.toISOString() || ''; dto.datasetInfo = (await dataset.datasetInfo).map((datasetInfo: DatasetInfo) => { @@ -366,8 +366,8 @@ export class DatasetDTO { const dimDto = new DimensionDTO(); dimDto.id = dimension.id; dimDto.type = dimension.type; - dimDto.start_revision_id = (await dimension.start_revision).id; - dimDto.finish_revision_id = (await dimension.finish_revision)?.id || undefined; + dimDto.start_revision_id = (await dimension.startRevision).id; + dimDto.finish_revision_id = (await dimension.finishRevision)?.id || undefined; dimDto.validator = dimension.validator; dimDto.dimensionInfo = (await dimension.dimensionInfo).map((dimInfo: DimensionInfo) => { const infoDto = new DimensionInfoDTO(); @@ -385,15 +385,15 @@ export class DatasetDTO { (await dataset.revisions).map(async (revision: Revision) => { const revDto = new RevisionDTO(); revDto.id = revision.id; - revDto.revision_index = revision.revision_index; + revDto.revision_index = revision.revisionIndex; revDto.dataset_id = (await revision.dataset).id; - revDto.creation_date = revision.creation_date.toISOString(); - revDto.previous_revision_id = (await revision.previous_revision)?.id || undefined; - revDto.online_cube_filename = revision.online_cube_filename; - revDto.publish_date = revision.publish_date?.toISOString() || ''; - revDto.approval_date = revision.approval_date?.toISOString() || ''; - revDto.approved_by = (await revision.approved_by)?.name || ''; - revDto.created_by = (await revision.created_by)?.name || ''; + revDto.creation_date = revision.creationDate.toISOString(); + revDto.previous_revision_id = (await revision.previousRevision)?.id || undefined; + revDto.online_cube_filename = revision.onlineCubeFilename; + revDto.publish_date = revision.publishDate?.toISOString() || ''; + revDto.approval_date = revision.approvalDate?.toISOString() || ''; + revDto.approved_by = (await revision.approvedBy)?.name || ''; + revDto.created_by = (await revision.createdBy)?.name || ''; revDto.imports = []; // Imports are intentionally empty in this method as per original code return revDto; }) diff --git a/src/dtos2/error.ts b/src/dtos/error.ts similarity index 100% rename from src/dtos2/error.ts rename to src/dtos/error.ts diff --git a/src/dtos2/filelist.ts b/src/dtos/filelist.ts similarity index 100% rename from src/dtos2/filelist.ts rename to src/dtos/filelist.ts diff --git a/src/dtos2/processedcsv.ts b/src/dtos/processedcsv.ts similarity index 100% rename from src/dtos2/processedcsv.ts rename to src/dtos/processedcsv.ts diff --git a/src/dtos2/upload-dto.ts b/src/dtos/upload-dto.ts similarity index 100% rename from src/dtos2/upload-dto.ts rename to src/dtos/upload-dto.ts diff --git a/src/dtos2/view-dto.ts b/src/dtos/view-dto.ts similarity index 100% rename from src/dtos2/view-dto.ts rename to src/dtos/view-dto.ts diff --git a/src/entity2/csv_info.ts b/src/entities/csv_info.ts similarity index 100% rename from src/entity2/csv_info.ts rename to src/entities/csv_info.ts diff --git a/src/entity2/dataset.ts b/src/entities/dataset.ts similarity index 76% rename from src/entity2/dataset.ts rename to src/entities/dataset.ts index 5a8cbbd..132e3d1 100644 --- a/src/entity2/dataset.ts +++ b/src/entities/dataset.ts @@ -1,6 +1,15 @@ -import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, ManyToOne, OneToMany, JoinColumn } from 'typeorm'; - -import { Users } from './users'; +import { + Entity, + PrimaryGeneratedColumn, + Column, + CreateDateColumn, + BaseEntity, + ManyToOne, + OneToMany, + JoinColumn +} from 'typeorm'; + +import { User } from './user'; // eslint-disable-next-line import/no-cycle import { Revision } from './revision'; // eslint-disable-next-line import/no-cycle @@ -13,12 +22,12 @@ export class Dataset extends BaseEntity { @PrimaryGeneratedColumn('uuid') id: string; - @Column({ type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', default: () => 'CURRENT_TIMESTAMP' }) + @CreateDateColumn() creation_date: Date; - @ManyToOne(() => Users) + @ManyToOne(() => User) @JoinColumn({ name: 'created_by' }) - created_by: Promise; + createdBy: Promise; @Column({ type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', nullable: true }) live: Date; diff --git a/src/entity2/dataset_info.ts b/src/entities/dataset_info.ts similarity index 100% rename from src/entity2/dataset_info.ts rename to src/entities/dataset_info.ts diff --git a/src/entity2/dimension.ts b/src/entities/dimension.ts similarity index 77% rename from src/entity2/dimension.ts rename to src/entities/dimension.ts index b241508..cea3fbb 100644 --- a/src/entity2/dimension.ts +++ b/src/entities/dimension.ts @@ -7,7 +7,7 @@ import { Revision } from './revision'; // eslint-disable-next-line import/no-cycle import { DimensionInfo } from './dimension_info'; import { Source } from './source'; -import { DimensionType } from './dimension_types'; +import { DimensionType } from './dimension_type'; @Entity() export class Dimension extends BaseEntity { @@ -24,26 +24,18 @@ export class Dimension extends BaseEntity { // Replace with actual enum types @Column({ type: process.env.NODE_ENV === 'test' ? 'text' : 'enum', - enum: [ - DimensionType.RAW, - DimensionType.TEXT, - DimensionType.NUMERIC, - DimensionType.SYMBOL, - DimensionType.LOOKUP_TABLE, - DimensionType.TIME_PERIOD, - DimensionType.TIME_POINT - ], + enum: Object.keys(DimensionType), nullable: false }) type: DimensionType; @ManyToOne(() => Revision) @JoinColumn({ name: 'start_revision_id' }) - start_revision: Promise; + startRevision: Promise; @ManyToOne(() => Revision, { nullable: true }) @JoinColumn({ name: 'finish_revision_id' }) - finish_revision: Promise; + finishRevision: Promise; @Column({ type: 'text', nullable: true }) validator: string; diff --git a/src/entity2/dimension_info.ts b/src/entities/dimension_info.ts similarity index 100% rename from src/entity2/dimension_info.ts rename to src/entities/dimension_info.ts diff --git a/src/entity2/dimension_types.ts b/src/entities/dimension_type.ts similarity index 100% rename from src/entity2/dimension_types.ts rename to src/entities/dimension_type.ts diff --git a/src/entity2/import.ts b/src/entities/import.ts similarity index 85% rename from src/entity2/import.ts rename to src/entities/import.ts index 93e8690..7353cb2 100644 --- a/src/entity2/import.ts +++ b/src/entities/import.ts @@ -1,4 +1,13 @@ -import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, ManyToOne, OneToMany, JoinColumn } from 'typeorm'; +import { + Entity, + PrimaryGeneratedColumn, + Column, + CreateDateColumn, + BaseEntity, + ManyToOne, + OneToMany, + JoinColumn +} from 'typeorm'; // eslint-disable-next-line import/no-cycle import { Revision } from './revision'; @@ -34,7 +43,7 @@ export class Import extends BaseEntity { @Column({ type: 'varchar', length: 255 }) hash: string; - @Column({ type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', default: () => 'CURRENT_TIMESTAMP' }) + @CreateDateColumn() uploaded_at: Date; @Column({ diff --git a/src/entities/revision.ts b/src/entities/revision.ts new file mode 100644 index 0000000..3265da5 --- /dev/null +++ b/src/entities/revision.ts @@ -0,0 +1,79 @@ +import { + Entity, + PrimaryGeneratedColumn, + Column, + CreateDateColumn, + BaseEntity, + JoinColumn, + OneToMany, + ManyToOne +} from 'typeorm'; + +// eslint-disable-next-line import/no-cycle +import { Dataset } from './dataset'; +import { User } from './user'; +// eslint-disable-next-line import/no-cycle +import { Import } from './import'; + +interface RevisionInterface { + id: string; + revisionIndex: number; + dataset: Promise; + creationDate: Date; + previousRevision: Promise; + onlineCubeFilename: string; + publishDate: Date; + approvalDate: Date; + approvedBy: Promise; + createdBy: Promise; + imports: Promise; +} + +@Entity() +export class Revision extends BaseEntity implements RevisionInterface { + @PrimaryGeneratedColumn('uuid') + id: string; + + @Column({ type: 'int' }) + revisionIndex: number; + + @ManyToOne(() => Dataset, (dataset) => dataset.revisions, { + onDelete: 'CASCADE', + orphanedRowAction: 'delete' + }) + @JoinColumn({ name: 'dataset_id' }) + dataset: Promise; + + @CreateDateColumn({ name: 'creation_date' }) + creationDate: Date; + + @ManyToOne(() => Revision, { + nullable: true, + onDelete: 'CASCADE', + orphanedRowAction: 'delete' + }) + @JoinColumn({ name: 'previous_revision_id' }) + previousRevision: Promise; + + @Column({ name: 'online_cube_filename', type: 'varchar', length: 255, nullable: true }) + onlineCubeFilename: string; + + @Column({ name: 'publish_date', type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', nullable: true }) + publishDate: Date; + + @Column({ name: 'approval_date', type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', nullable: true }) + approvalDate: Date; + + @OneToMany(() => Import, (importEntity) => importEntity.revision, { + cascade: true + }) + imports: Promise; + + @ManyToOne(() => User, { nullable: true }) + @JoinColumn({ name: 'approved_by' }) + approvedBy: Promise; + + @ManyToOne(() => User) + @JoinColumn({ name: 'created_by' }) + createdBy: Promise; +} diff --git a/src/entity2/source.ts b/src/entities/source.ts similarity index 89% rename from src/entity2/source.ts rename to src/entities/source.ts index 8b381a7..0265870 100644 --- a/src/entity2/source.ts +++ b/src/entities/source.ts @@ -38,11 +38,11 @@ export class Source extends BaseEntity { // @JoinColumn({ name: 'lookup_table_revision_id' }) // lookupTableRevision: LookupTableRevision; - @Column({ type: 'int', nullable: false }) - column_index: number; + @Column({ name: 'column_index', type: 'int', nullable: false }) + columnIndex: number; - @Column({ type: 'text' }) - csv_field: string; + @Column({ name: 'csv_field', type: 'text' }) + csvField: string; // Replace with actual enum types @Column({ diff --git a/src/entities/user.ts b/src/entities/user.ts new file mode 100644 index 0000000..43141d6 --- /dev/null +++ b/src/entities/user.ts @@ -0,0 +1,49 @@ +import { Entity, PrimaryGeneratedColumn, Column, BaseEntity } from 'typeorm'; + +@Entity({ name: 'users' }) +export class User extends BaseEntity { + @PrimaryGeneratedColumn('uuid') + id: string; + + @Column({ unique: true }) + email: string; + + @Column({ nullable: true }) + name: string; + + @Column({ nullable: true }) + given_name: string; + + @Column({ nullable: true }) + last_name: string; + + @Column({ + name: 'created_at', + type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', default: () => 'CURRENT_TIMESTAMP' + }) + createdAt: Date; + + @Column({ + name: 'updated_at', + type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', + default: () => 'CURRENT_TIMESTAMP', + onUpdate: 'CURRENT_TIMESTAMP' + }) + updatedAt: Date; + + @Column({ type: 'boolean', default: true }) + active: boolean; + + public static getTestUser(): User { + const user = new User(); + user.id = '12345678-1234-1234-1234-123456789012'; + user.email = 'test@test.com'; + user.name = 'Test User'; + user.given_name = 'Test'; + user.last_name = 'User'; + user.createdAt = new Date(); + user.updatedAt = new Date(); + user.active = true; + return user; + } +} diff --git a/src/entity2/revision.ts b/src/entity2/revision.ts deleted file mode 100644 index 1478294..0000000 --- a/src/entity2/revision.ts +++ /dev/null @@ -1,70 +0,0 @@ -import { Entity, PrimaryGeneratedColumn, Column, BaseEntity, JoinColumn, OneToMany, ManyToOne } from 'typeorm'; - -// eslint-disable-next-line import/no-cycle -import { Dataset } from './dataset'; -import { Users } from './users'; -// eslint-disable-next-line import/no-cycle -import { Import } from './import'; - -interface RevisionInterface { - id: string; - revision_index: number; - dataset: Promise; - creation_date: Date; - previous_revision: Promise; - online_cube_filename: string; - publish_date: Date; - approval_date: Date; - approved_by: Promise; - created_by: Promise; - imports: Promise; -} - -@Entity() -export class Revision extends BaseEntity implements RevisionInterface { - @PrimaryGeneratedColumn('uuid') - id: string; - - @Column({ type: 'int' }) - revision_index: number; - - @ManyToOne(() => Dataset, (dataset) => dataset.revisions, { - onDelete: 'CASCADE', - orphanedRowAction: 'delete' - }) - @JoinColumn({ name: 'dataset_id' }) - dataset: Promise; - - @Column({ type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', default: () => 'CURRENT_TIMESTAMP' }) - creation_date: Date; - - @ManyToOne(() => Revision, { - nullable: true, - onDelete: 'CASCADE', - orphanedRowAction: 'delete' - }) - @JoinColumn({ name: 'previous_revision_id' }) - previous_revision: Promise; - - @Column({ type: 'varchar', length: 255, nullable: true }) - online_cube_filename: string; - - @Column({ type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', nullable: true }) - publish_date: Date; - - @Column({ type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', nullable: true }) - approval_date: Date; - - @OneToMany(() => Import, (importEntity) => importEntity.revision, { - cascade: true - }) - imports: Promise; - - @ManyToOne(() => Users, { nullable: true }) - @JoinColumn({ name: 'approved_by' }) - approved_by: Promise; - - @ManyToOne(() => Users) - @JoinColumn({ name: 'created_by' }) - created_by: Promise; -} diff --git a/src/entity2/users.ts b/src/entity2/users.ts deleted file mode 100644 index 7f61ded..0000000 --- a/src/entity2/users.ts +++ /dev/null @@ -1,73 +0,0 @@ -import { Entity, PrimaryGeneratedColumn, Column, BaseEntity } from 'typeorm'; - -@Entity() -export class Users extends BaseEntity { - @PrimaryGeneratedColumn('uuid') - id: string; - - @Column({ unique: true }) - email: string; - - @Column({ nullable: true, unique: true }) - oidc_subject: string; - - @Column({ nullable: true }) - oidc_issuer: string; - - @Column({ type: 'text', nullable: true }) - access_token: string; - - @Column({ type: 'text', nullable: true }) - refresh_token: string; - - @Column({ type: 'text', nullable: true }) - id_token: string; - - @Column({ type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', nullable: true }) - token_expiry: Date; - - @Column({ nullable: true }) - name: string; - - @Column({ nullable: true }) - given_name: string; - - @Column({ nullable: true }) - last_name: string; - - @Column({ nullable: true }) - profile_picture: string; - - @Column({ type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', default: () => 'CURRENT_TIMESTAMP' }) - created_at: Date; - - @Column({ - type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', - default: () => 'CURRENT_TIMESTAMP', - onUpdate: 'CURRENT_TIMESTAMP' - }) - updated_at: Date; - - @Column({ type: 'boolean', default: true }) - active: boolean; - - public static getTestUser(): Users { - const user = new Users(); - user.id = '12345678-1234-1234-1234-123456789012'; - user.email = 'test@test.com'; - user.oidc_subject = ''; - user.oidc_issuer = 'localAuth'; - user.access_token = ''; - user.refresh_token = ''; - user.id_token = ''; - user.token_expiry = new Date(); - user.name = 'Test User'; - user.given_name = 'Test'; - user.last_name = 'User'; - user.profile_picture = ''; - user.created_at = new Date(); - user.updated_at = new Date(); - user.active = true; - return user; - } -} diff --git a/src/route/dataset-route.ts b/src/route/dataset-route.ts index a5c1dc4..40615c5 100644 --- a/src/route/dataset-route.ts +++ b/src/route/dataset-route.ts @@ -7,7 +7,7 @@ import { Request, Response, Router } from 'express'; import multer from 'multer'; import pino from 'pino'; -import { ViewErrDTO, ViewDTO, ViewStream } from '../dtos2/view-dto'; +import { ViewErrDTO, ViewDTO, ViewStream } from '../dtos/view-dto'; import { ENGLISH, WELSH, t } from '../app'; import { processCSVFromDatalake, @@ -17,14 +17,14 @@ import { getFileFromBlobStorage, getFileFromDataLake } from '../controllers/csv-processor'; -import { Users } from '../entity2/users'; -import { Dataset } from '../entity2/dataset'; -import { DatasetInfo } from '../entity2/dataset_info'; -import { Dimension } from '../entity2/dimension'; -import { Revision } from '../entity2/revision'; -import { Import } from '../entity2/import'; -import { DatasetTitle, FileDescription } from '../dtos2/filelist'; -import { DatasetDTO, DimensionDTO, RevisionDTO } from '../dtos2/dataset-dto'; +import { User } from '../entities/user'; +import { Dataset } from '../entities/dataset'; +import { DatasetInfo } from '../entities/dataset_info'; +import { Dimension } from '../entities/dimension'; +import { Revision } from '../entities/revision'; +import { Import } from '../entities/import'; +import { DatasetTitle, FileDescription } from '../dtos/filelist'; +import { DatasetDTO, DimensionDTO, RevisionDTO } from '../dtos/dataset-dto'; export const logger = pino({ name: 'StatsWales-Alpha-App: DatasetRoute', @@ -164,13 +164,13 @@ apiRoute.post('/', upload.single('csv'), async (req: Request, res: Response) => const dataset = new Dataset(); dataset.id = randomUUID(); dataset.creation_date = new Date(); - const user = await Users.findOneBy({ id: Users.getTestUser().id }); + const user = await User.findOneBy({ id: User.getTestUser().id }); if (user === null) { throw new Error('Test user not found'); } // TODO change how we handle authentication to get the user on the Backend // We are using a stub test user for all requests at the moment - dataset.created_by = Promise.resolve(user); + dataset.createdBy = Promise.resolve(user); const datasetInfo = new DatasetInfo(); datasetInfo.language = lang; datasetInfo.title = title; @@ -178,9 +178,9 @@ apiRoute.post('/', upload.single('csv'), async (req: Request, res: Response) => dataset.datasetInfo = Promise.resolve([datasetInfo]); const revision = new Revision(); revision.dataset = Promise.resolve(dataset); - revision.revision_index = 1; - revision.creation_date = new Date(); - revision.created_by = Promise.resolve(user); + revision.revisionIndex = 1; + revision.creationDate = new Date(); + revision.createdBy = Promise.resolve(user); dataset.revisions = Promise.resolve([revision]); importRecord.revision = Promise.resolve(revision); revision.imports = Promise.resolve([importRecord]); @@ -225,6 +225,53 @@ apiRoute.get('/:dataset_id', async (req: Request, res: Response) => { res.json(dto); }); +// GET /api/dataset/:dataset_id/view +// Returns a view of the data file attached to the import +apiRoute.get('/:dataset_id/view', async (req: Request, res: Response) => { + const datasetID: string = req.params.dataset_id; + const dataset = await validateDataset(datasetID, res); + if (!dataset) return; + const latestRevision = await Revision.find({ + where: { dataset }, + order: { creationDate: 'DESC' }, + take: 1 + }); + if (!latestRevision) { + logger.error('Unable to find the last revision'); + res.status(404); + res.json({ message: 'No revision found for dataset' }); + return; + } + const latestImport = await Import.findOne({ + where: [{ revision: latestRevision[0] }], + order: { uploaded_at: 'DESC' } + }); + if (!latestImport) { + logger.error('Unable to find the last import record'); + res.status(404); + res.json({ message: 'No import record found for dataset' }); + return; + } + const page_number_str: string = req.query.page_number || req.body?.page_number; + const page_size_str: string = req.query.page_size || req.body?.page_size; + const page_number: number = Number.parseInt(page_number_str, 10) || 1; + const page_size: number = Number.parseInt(page_size_str, 10) || DEFAULT_PAGE_SIZE; + let processedCSV: ViewErrDTO | ViewDTO; + if (latestImport.location === 'BlobStorage') { + processedCSV = await processCSVFromBlobStorage(dataset, latestImport, page_number, page_size); + } else if (latestImport.location === 'Datalake') { + processedCSV = await processCSVFromDatalake(dataset, latestImport, page_number, page_size); + } else { + res.status(400); + res.json({ message: 'Import location not supported.' }); + return; + } + if (!processedCSV.success) { + res.status(400); + } + res.json(processedCSV); +}); + // GET /api/dataset/:dataset_id/dimension/id/:dimension_id // Returns details of a dimension with its sources and imports apiRoute.get('/:dataset_id/dimension/by-id/:dimension_id', async (req: Request, res: Response) => { @@ -331,23 +378,3 @@ apiRoute.get( }); } ); - -// apiRoute.get('/:dataset/view', async (req: Request, res: Response) => { -// const datasetID = req.params.dataset; -// if (!checkDatasetID(datasetID, res)) return; -// const dataset = await Dataset.findOneBy({ id: datasetID }); -// if (dataset === undefined || dataset === null) { -// res.status(404); -// res.json({ message: 'Dataset not found... Dataset ID not found in Database' }); -// return; -// } -// const page_number_str: string = req.query.page_number || req.body?.page_number; -// const page_size_str: string = req.query.page_size || req.body?.page_size; -// const page_number: number = Number.parseInt(page_number_str, 10) || 1; -// const page_size: number = Number.parseInt(page_size_str, 10) || DEFAULT_PAGE_SIZE; -// const processedCSV = await processCSVFromDatalake(dataset, page_number, page_size); -// if (!processedCSV.success) { -// res.status(500); -// } -// res.json(processedCSV); -// }); diff --git a/test/dataset.test.ts b/test/dataset.test.ts index 79aeeb7..57d2cfd 100644 --- a/test/dataset.test.ts +++ b/test/dataset.test.ts @@ -7,18 +7,18 @@ import request from 'supertest'; import { DataLakeService } from '../src/controllers/datalake'; import { BlobStorageService } from '../src/controllers/blob-storage'; import app, { ENGLISH, WELSH, t, dbManager, databaseManager } from '../src/app'; -import { Dataset } from '../src/entity2/dataset'; -import { DatasetInfo } from '../src/entity2/dataset_info'; -import { Revision } from '../src/entity2/revision'; -import { Import } from '../src/entity2/import'; -import { CsvInfo } from '../src/entity2/csv_info'; -import { Source } from '../src/entity2/source'; -import { Dimension } from '../src/entity2/dimension'; -import { DimensionType } from '../src/entity2/dimension_types'; -import { DimensionInfo } from '../src/entity2/dimension_info'; -import { Users } from '../src/entity2/users'; -import { DatasetDTO, DimensionDTO, RevisionDTO } from '../src/dtos2/dataset-dto'; -import { ViewErrDTO } from '../src/dtos2/view-dto'; +import { Dataset } from '../src/entities/dataset'; +import { DatasetInfo } from '../src/entities/dataset_info'; +import { Revision } from '../src/entities/revision'; +import { Import } from '../src/entities/import'; +import { CsvInfo } from '../src/entities/csv_info'; +import { Source } from '../src/entities/source'; +import { Dimension } from '../src/entities/dimension'; +import { DimensionType } from '../src/entities/dimension_type'; +import { DimensionInfo } from '../src/entities/dimension_info'; +import { User } from '../src/entities/user'; +import { DatasetDTO, DimensionDTO, RevisionDTO } from '../src/dtos/dataset-dto'; +import { ViewErrDTO } from '../src/dtos/view-dto'; import { MAX_PAGE_SIZE, MIN_PAGE_SIZE } from '../src/controllers/csv-processor'; import { datasourceOptions } from './test-data-source'; @@ -40,12 +40,12 @@ describe('API Endpoints', () => { beforeAll(async () => { await databaseManager(datasourceOptions); await dbManager.initializeDataSource(); - const user = Users.getTestUser(); + const user = User.getTestUser(); await user.save(); // First create a dataset const dataset1 = new Dataset(); dataset1.id = dataset1Id; - dataset1.created_by = Promise.resolve(user); + dataset1.createdBy = Promise.resolve(user); dataset1.live = new Date(Date.now()); // Give it some info const datasetInfo1 = new DatasetInfo(); @@ -58,8 +58,8 @@ describe('API Endpoints', () => { const revision1 = new Revision(); revision1.id = revision1Id; revision1.dataset = Promise.resolve(dataset1); - revision1.created_by = Promise.resolve(user); - revision1.revision_index = 1; + revision1.createdBy = Promise.resolve(user); + revision1.revisionIndex = 1; dataset1.revisions = Promise.resolve([revision1]); // Attach an import e.g. a file to the revision const import1 = new Import(); @@ -88,32 +88,32 @@ describe('API Endpoints', () => { source1.id = '304574E6-8DD0-4654-BE67-FA055C9F7C81'; source1.import = Promise.resolve(import1); source1.revision = Promise.resolve(revision1); - source1.csv_field = 'ID'; - source1.column_index = 0; + source1.csvField = 'ID'; + source1.columnIndex = 0; source1.action = 'ignore'; sources.push(source1); const source2 = new Source(); source2.id = 'D3D3D3D3-8DD0-4654-BE67-FA055C9F7C81'; source2.import = Promise.resolve(import1); source2.revision = Promise.resolve(revision1); - source2.csv_field = 'Text'; - source2.column_index = 1; + source2.csvField = 'Text'; + source2.columnIndex = 1; source2.action = 'create'; sources.push(source2); const source3 = new Source(); source3.id = 'D62FA390-9AB2-496E-A6CA-0C0E2FCF206E'; source3.import = Promise.resolve(import1); source3.revision = Promise.resolve(revision1); - source3.csv_field = 'Number'; - source3.column_index = 2; + source3.csvField = 'Number'; + source3.columnIndex = 2; source3.action = 'create'; sources.push(source3); const source4 = new Source(); source4.id = 'FB25D668-54F2-44EF-99FE-B4EDC4AF2911'; source4.import = Promise.resolve(import1); source4.revision = Promise.resolve(revision1); - source4.csv_field = 'Date'; - source4.column_index = 3; + source4.csvField = 'Date'; + source4.columnIndex = 3; source4.action = 'create'; sources.push(source4); import1.sources = Promise.resolve(sources); @@ -123,7 +123,7 @@ describe('API Endpoints', () => { const dimension1 = new Dimension(); dimension1.id = dimension1Id; dimension1.dataset = Promise.resolve(dataset1); - dimension1.start_revision = Promise.resolve(revision1); + dimension1.startRevision = Promise.resolve(revision1); dimension1.type = DimensionType.RAW; const dimension1Info = new DimensionInfo(); dimension1Info.dimension = Promise.resolve(dimension1); @@ -138,7 +138,7 @@ describe('API Endpoints', () => { const dimension2 = new Dimension(); dimension2.id = '61D51F82-0771-4C90-849E-55FFA7A4D802'; dimension2.dataset = Promise.resolve(dataset1); - dimension2.start_revision = Promise.resolve(revision1); + dimension2.startRevision = Promise.resolve(revision1); dimension2.type = DimensionType.TEXT; const dimension2Info = new DimensionInfo(); dimension2Info.dimension = Promise.resolve(dimension2); @@ -153,7 +153,7 @@ describe('API Endpoints', () => { const dimension3 = new Dimension(); dimension3.id = 'F4D5B0F4-180E-4020-AAD5-9300B673D92B'; dimension3.dataset = Promise.resolve(dataset1); - dimension3.start_revision = Promise.resolve(revision1); + dimension3.startRevision = Promise.resolve(revision1); dimension3.type = DimensionType.NUMERIC; const dimension3Info = new DimensionInfo(); dimension3Info.dimension = Promise.resolve(dimension3); @@ -168,7 +168,7 @@ describe('API Endpoints', () => { const dimension4 = new Dimension(); dimension4.id = 'C24962F4-F395-40EF-B4DD-270E90E10972'; dimension4.dataset = Promise.resolve(dataset1); - dimension4.start_revision = Promise.resolve(revision1); + dimension4.startRevision = Promise.resolve(revision1); dimension4.type = DimensionType.TIME_POINT; const dimension4Info = new DimensionInfo(); dimension4Info.dimension = Promise.resolve(dimension4); @@ -428,7 +428,24 @@ describe('API Endpoints', () => { }); }); - test('Get file rertunrs 200 and complete file data', async () => { + test('Get file from a dataset rertunrs 200 and complete file data', async () => { + const testFile2 = path.resolve(__dirname, `./test-data-2.csv`); + const testFile1Buffer = fs.readFileSync(testFile2); + BlobStorageService.prototype.readFile = jest.fn().mockReturnValue(testFile1Buffer.toString()); + + const res = await request(app) + .get(`/en-GB/dataset/${dataset1Id}/view`) + .query({ page_number: 2, page_size: 100 }); + expect(res.status).toBe(200); + expect(res.body.current_page).toBe(2); + expect(res.body.total_pages).toBe(6); + expect(res.body.page_size).toBe(100); + expect(res.body.headers).toEqual(['ID', 'Text', 'Number', 'Date']); + expect(res.body.data[0]).toEqual(['101', 'GEYiRzLIFM', '774477', '2002-03-13']); + expect(res.body.data[99]).toEqual(['200', 'QhBxdmrUPb', '3256099', '2026-12-17']); + }); + + test('Get file from a revision and import rertunrs 200 and complete file data', async () => { const testFile2 = path.resolve(__dirname, `./test-data-2.csv`); const testFileStream = fs.createReadStream(testFile2); const testFile2Buffer = fs.readFileSync(testFile2); diff --git a/test/test-data-source.ts b/test/test-data-source.ts index ff02af2..fcb1181 100644 --- a/test/test-data-source.ts +++ b/test/test-data-source.ts @@ -2,15 +2,15 @@ import 'reflect-metadata'; import { DataSourceOptions } from 'typeorm'; import * as dotenv from 'dotenv'; -import { Dataset } from '../src/entity2/dataset'; -import { DatasetInfo } from '../src/entity2/dataset_info'; -import { Revision } from '../src/entity2/revision'; -import { Import } from '../src/entity2/import'; -import { CsvInfo } from '../src/entity2/csv_info'; -import { Source } from '../src/entity2/source'; -import { Dimension } from '../src/entity2/dimension'; -import { DimensionInfo } from '../src/entity2/dimension_info'; -import { Users } from '../src/entity2/users'; +import { Dataset } from '../src/entities/dataset'; +import { DatasetInfo } from '../src/entities/dataset_info'; +import { Revision } from '../src/entities/revision'; +import { Import } from '../src/entities/import'; +import { CsvInfo } from '../src/entities/csv_info'; +import { Source } from '../src/entities/source'; +import { Dimension } from '../src/entities/dimension'; +import { DimensionInfo } from '../src/entities/dimension_info'; +import { User } from '../src/entities/user'; dotenv.config(); @@ -20,6 +20,6 @@ export const datasourceOptions: DataSourceOptions = { database: ':memory:', synchronize: true, logging: false, - entities: [Dataset, DatasetInfo, Revision, Import, CsvInfo, Source, Dimension, DimensionInfo, Users], + entities: [Dataset, DatasetInfo, Revision, Import, CsvInfo, Source, Dimension, DimensionInfo, User], subscribers: [] }; From 29db52e0a563b66fcde93017bd8cc364c963039c Mon Sep 17 00:00:00 2001 From: Jamie Maynard Date: Fri, 6 Sep 2024 13:32:09 +0100 Subject: [PATCH 5/5] Refactor timezone to timezonetz --- src/controllers/csv-processor.ts | 2 +- src/entities/dataset.ts | 4 ++-- src/entities/revision.ts | 12 ++++++++++-- src/entities/user.ts | 13 +++++++------ src/migration/1723729297617-migration.ts | 20 ++++++++++---------- 5 files changed, 30 insertions(+), 21 deletions(-) diff --git a/src/controllers/csv-processor.ts b/src/controllers/csv-processor.ts index 453912a..f4e4a44 100644 --- a/src/controllers/csv-processor.ts +++ b/src/controllers/csv-processor.ts @@ -119,7 +119,7 @@ function validateParams(page_number: number, max_page_number: number, page_size: export const uploadCSVToBlobStorage = async (fileStream: Readable, filetype: string): Promise => { const blobStorageService = new BlobStorageService(); - if(!fileStream) { + if (!fileStream) { logger.error('No buffer to upload to blob storage'); throw new Error('No buffer to upload to blob storage'); } diff --git a/src/entities/dataset.ts b/src/entities/dataset.ts index 132e3d1..3008c98 100644 --- a/src/entities/dataset.ts +++ b/src/entities/dataset.ts @@ -29,10 +29,10 @@ export class Dataset extends BaseEntity { @JoinColumn({ name: 'created_by' }) createdBy: Promise; - @Column({ type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', nullable: true }) + @Column({ type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamptz', nullable: true }) live: Date; - @Column({ type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', nullable: true }) + @Column({ type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamptz', nullable: true }) archive: Date; @OneToMany(() => DatasetInfo, (datasetInfo) => datasetInfo.dataset, { diff --git a/src/entities/revision.ts b/src/entities/revision.ts index 3265da5..93de270 100644 --- a/src/entities/revision.ts +++ b/src/entities/revision.ts @@ -58,10 +58,18 @@ export class Revision extends BaseEntity implements RevisionInterface { @Column({ name: 'online_cube_filename', type: 'varchar', length: 255, nullable: true }) onlineCubeFilename: string; - @Column({ name: 'publish_date', type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', nullable: true }) + @Column({ + name: 'publish_date', + type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamptz', + nullable: true + }) publishDate: Date; - @Column({ name: 'approval_date', type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', nullable: true }) + @Column({ + name: 'approval_date', + type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamptz', + nullable: true + }) approvalDate: Date; @OneToMany(() => Import, (importEntity) => importEntity.revision, { diff --git a/src/entities/user.ts b/src/entities/user.ts index 43141d6..ce9b47c 100644 --- a/src/entities/user.ts +++ b/src/entities/user.ts @@ -12,20 +12,21 @@ export class User extends BaseEntity { name: string; @Column({ nullable: true }) - given_name: string; + givenName: string; @Column({ nullable: true }) - last_name: string; + lastName: string; @Column({ name: 'created_at', - type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', default: () => 'CURRENT_TIMESTAMP' + type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamptz', + default: () => 'CURRENT_TIMESTAMP' }) createdAt: Date; @Column({ name: 'updated_at', - type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamp', + type: process.env.NODE_ENV === 'test' ? 'datetime' : 'timestamptz', default: () => 'CURRENT_TIMESTAMP', onUpdate: 'CURRENT_TIMESTAMP' }) @@ -39,8 +40,8 @@ export class User extends BaseEntity { user.id = '12345678-1234-1234-1234-123456789012'; user.email = 'test@test.com'; user.name = 'Test User'; - user.given_name = 'Test'; - user.last_name = 'User'; + user.givenName = 'Test'; + user.lastName = 'User'; user.createdAt = new Date(); user.updatedAt = new Date(); user.active = true; diff --git a/src/migration/1723729297617-migration.ts b/src/migration/1723729297617-migration.ts index 5abd7d4..b0f0c97 100644 --- a/src/migration/1723729297617-migration.ts +++ b/src/migration/1723729297617-migration.ts @@ -11,22 +11,22 @@ export class Migration1723729297617 implements MigrationInterface { access_token TEXT, refresh_token TEXT, id_token TEXT, - token_expiry TIMESTAMP, + token_expiry TIMESTAMPTZ, name VARCHAR(255), given_name VARCHAR(255), last_name VARCHAR(255), profile_picture VARCHAR(255), - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, active BOOLEAN NOT NULL DEFAULT true ); CREATE TABLE dataset ( id UUID DEFAULT gen_random_uuid() PRIMARY KEY, - creation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + creation_date TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, created_by UUID, - live TIMESTAMP, - archive TIMESTAMP, + live TIMESTAMPTZ, + archive TIMESTAMPTZ, FOREIGN KEY (created_by) REFERENCES users(id) ); @@ -43,11 +43,11 @@ export class Migration1723729297617 implements MigrationInterface { id UUID DEFAULT gen_random_uuid() PRIMARY KEY, revision_index INT, dataset_id UUID, - creation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + creation_date TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, previous_revision_id UUID, online_cube_filename VARCHAR(255), - publish_date TIMESTAMP, - approval_date TIMESTAMP, + publish_date TIMESTAMPTZ, + approval_date TIMESTAMPTZ, approved_by UUID, created_by UUID, FOREIGN KEY (dataset_id) REFERENCES dataset(id) ON DELETE CASCADE, @@ -89,7 +89,7 @@ export class Migration1723729297617 implements MigrationInterface { mime_type VARCHAR(255), filename VARCHAR(255), hash VARCHAR(255), - uploaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + uploaded_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, type import_type NOT NULL, location location_type NOT NULL, FOREIGN KEY (revision_id) REFERENCES revision(id) ON DELETE CASCADE