Skip to content

Commit

Permalink
Merge pull request #5 from Marvell-Consulting/40-242-upload-dataset-j…
Browse files Browse the repository at this point in the history
…ourney

Implement Database Layer for Beta
  • Loading branch information
easternbloc authored Jun 7, 2024
2 parents c04ce19 + 3389872 commit 6c9bb83
Show file tree
Hide file tree
Showing 34 changed files with 1,065 additions and 410 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"dev": "nodemon --watch src -e ts,ejs --exec npm run dev:start",
"start": "npm run build && node dist/server.js",
"start:container": "node dist/server.js",
"typeorm": "ts-node -r tsconfig-paths/register ./node_modules/typeorm/cli.js",
"typeorm": "typeorm-ts-node-commonjs",
"migration": "npm run typeorm migration:run"
},
"keywords": [],
Expand Down
17 changes: 9 additions & 8 deletions src/app.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,27 @@
/* eslint-disable import/no-cycle */
import 'reflect-metadata';

import pino from 'pino';
import pino, { Logger } from 'pino';
import express, { Application, Request, Response } from 'express';
import i18next from 'i18next';
import Backend from 'i18next-fs-backend';
import i18nextMiddleware from 'i18next-http-middleware';
import { DataSourceOptions } from 'typeorm';

import { apiRoute } from './route/dataset';
import { apiRoute } from './route/dataset-route';
import { healthcheck } from './route/healthcheck';
import DatabaseManager from './database-manager';

// eslint-disable-next-line import/no-mutable-exports
export let dbManager: DatabaseManager;

export const logger: Logger = pino({
name: 'StatsWales-Alpha-App',
level: 'debug'
});

export const connectToDb = async (datasourceOptions: DataSourceOptions) => {
dbManager = new DatabaseManager(datasourceOptions);
dbManager = new DatabaseManager(datasourceOptions, logger);
await dbManager.initializeDataSource();
};

Expand All @@ -39,11 +45,6 @@ i18next

const app: Application = express();

export const logger = pino({
name: 'StatsWales-Alpha-App',
level: 'debug'
});

app.use(i18nextMiddleware.handle(i18next));
app.use('/:lang/dataset', apiRoute);
app.use('/:lang/healthcheck', healthcheck);
Expand Down
133 changes: 49 additions & 84 deletions src/controllers/csv-processor.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
/* eslint-disable import/no-cycle */
import { createHash } from 'crypto';

import { parse } from 'csv';
import pino from 'pino';

import { ProcessedCSV } from '../models/processedcsv';
import { UploadDTO, UploadErrDTO } from '../dtos/upload-dto';
import { Error } from '../models/error';
import { Datafile } from '../entity/Datafile';
import { Datafile } from '../entity/datafile';
import { Dataset } from '../entity/dataset';
import { ViewDTO, ViewErrDTO } from '../dtos/view-dto';
import { datasetToDatasetDTO } from '../dtos/dataset-dto';

import { DataLakeService } from './datalake';

Expand Down Expand Up @@ -56,41 +62,26 @@ function validateParams(page_number: number, max_page_number: number, page_size:
return errors;
}

export const uploadCSV = async (buff: Buffer, datafile: Datafile): Promise<ProcessedCSV> => {
const dataLateService = new DataLakeService();
export const uploadCSV = async (buff: Buffer, dataset: Dataset): Promise<UploadDTO | UploadErrDTO> => {
const dataLakeService = new DataLakeService();
const hash = createHash('sha256').update(buff).digest('hex');
const datafile = Datafile.createDatafile(dataset, hash.toString(), 'BetaUser');
const savedDataFile = await datafile.save();
const dto = await datasetToDatasetDTO(dataset);
if (buff) {
try {
logger.debug(`Uploading file ${datafile} to datalake`);
await dataLateService.uploadFile(`${datafile.id}.csv`, buff);
logger.debug(`Uploading file ${savedDataFile.id} to datalake`);
await dataLakeService.uploadFile(`${savedDataFile.id}.csv`, buff);
return {
success: true,
datafile_id: datafile.id,
datafile_name: datafile.name,
datafile_description: datafile.description,
page_size: undefined,
page_info: undefined,
pages: undefined,
current_page: undefined,
total_pages: undefined,
headers: undefined,
data: undefined,
errors: undefined
dataset: dto
};
} catch (err) {
logger.error(err);
datafile.remove();
return {
success: false,
datafile_id: undefined,
datafile_name: undefined,
datafile_description: undefined,
page_size: undefined,
page_info: undefined,
pages: undefined,
current_page: undefined,
total_pages: undefined,
headers: undefined,
data: undefined,
dataset: dto,
errors: [{ field: 'csv', message: 'Error uploading file to datalake' }]
};
}
Expand All @@ -99,16 +90,7 @@ export const uploadCSV = async (buff: Buffer, datafile: Datafile): Promise<Proce
datafile.remove();
return {
success: false,
datafile_id: datafile.id,
datafile_name: undefined,
datafile_description: undefined,
page_size: undefined,
page_info: undefined,
pages: undefined,
current_page: undefined,
total_pages: undefined,
headers: undefined,
data: undefined,
dataset: dto,
errors: [{ field: 'csv', message: 'No CSV data available' }]
};
}
Expand All @@ -124,10 +106,30 @@ function setupPagination(page: number, total_pages: number): Array<string | numb
return pages;
}

export const processCSV = async (filename: string, page: number, size: number): Promise<ProcessedCSV> => {
const dataLateService = new DataLakeService();
export const processCSV = async (dataset: Dataset, page: number, size: number): Promise<ViewErrDTO | ViewDTO> => {
const datalakeService = new DataLakeService();
try {
const buff = await dataLateService.downloadFile(`${filename}.csv`);
const datafiles = await dataset.datafiles;
const datafile: Datafile | undefined = datafiles
.sort(
(first: Datafile, second: Datafile) =>
new Date(second.creationDate).getTime() - new Date(first.creationDate).getTime()
)
.shift();
if (datafile === undefined || datafile === null) {
return {
success: false,
errors: [
{
field: 'dataset',
message: 'No datafile attached to Dataset'
}
],
dataset_id: dataset.id
};
}
const buff = await datalakeService.downloadFile(`${datafile.id}.csv`);

const dataArray: Array<Array<string>> = (await parse(buff, {
delimiter: ','
}).toArray()) as string[][];
Expand All @@ -137,34 +139,8 @@ export const processCSV = async (filename: string, page: number, size: number):
if (errors.length > 0) {
return {
success: false,
datafile_id: filename,
datafile_name: undefined,
datafile_description: undefined,
page_size: undefined,
page_info: undefined,
pages: undefined,
current_page: undefined,
total_pages: undefined,
headers: undefined,
data: undefined,
errors
};
}
const datafile = await Datafile.findOneBy({ id: filename });
if (datafile === null) {
return {
success: false,
datafile_id: filename,
datafile_name: undefined,
datafile_description: undefined,
page_size: undefined,
page_info: undefined,
pages: undefined,
current_page: undefined,
total_pages: undefined,
headers: undefined,
data: undefined,
errors: [{ field: 'csv', message: 'unable to find datafile in database' }]
errors,
dataset_id: dataset.id
};
}

Expand All @@ -179,11 +155,10 @@ export const processCSV = async (filename: string, page: number, size: number):
return page * size;
}
};
const dto = await datasetToDatasetDTO(dataset);
return {
success: true,
datafile_id: filename,
datafile_name: datafile.name,
datafile_description: datafile.description,
dataset: dto,
current_page: page,
page_info: {
total_records: dataArray.length,
Expand All @@ -194,24 +169,14 @@ export const processCSV = async (filename: string, page: number, size: number):
page_size: size,
total_pages,
headers: csvheaders,
data: csvdata,
errors: undefined
data: csvdata
};
} catch (err) {
logger.error(err);
return {
success: false,
datafile_id: filename,
datafile_name: undefined,
datafile_description: undefined,
page_size: undefined,
page_info: undefined,
pages: undefined,
current_page: undefined,
total_pages: undefined,
headers: undefined,
data: undefined,
errors: [{ field: 'csv', message: 'Error downloading file from datalake' }]
errors: [{ field: 'csv', message: 'Error downloading file from datalake' }],
dataset_id: dataset.id
};
}
};
11 changes: 6 additions & 5 deletions src/data-source.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import 'reflect-metadata';
import { DataSourceOptions } from 'typeorm';
import { DataSource, DataSourceOptions } from 'typeorm';
import * as dotenv from 'dotenv';

import { Datafile } from './entity/Datafile';

dotenv.config();

const { DB_HOST, DB_PORT, DB_USERNAME, DB_PASSWORD, DB_DATABASE } = process.env;

console.log(DB_HOST);

export const datasourceOptions: DataSourceOptions = {
type: 'postgres',
host: DB_HOST,
Expand All @@ -18,8 +18,9 @@ export const datasourceOptions: DataSourceOptions = {
ssl: true,
synchronize: false,
logging: false,
entities: [Datafile],
// entities: [`${__dirname}/entity/*.ts`],
entities: [`${__dirname}/entity/*.ts`],
migrations: [`${__dirname}/migration/*.ts`],
subscribers: []
};

export const dataSource = new DataSource(datasourceOptions);
34 changes: 28 additions & 6 deletions src/database-manager.ts
Original file line number Diff line number Diff line change
@@ -1,32 +1,54 @@
/* eslint-disable import/no-cycle */
import 'reflect-metadata';
import { DataSource, DataSourceOptions } from 'typeorm';
import { DataSource, DataSourceOptions, EntityManager } from 'typeorm';
import { Logger } from 'pino';

import { Datafile } from './entity/Datafile';
import { Dataset } from './entity/dataset';
import { Datafile } from './entity/datafile';
import { LookupTable } from './entity/lookuptable';
import { DatasetTitle } from './entity/dataset_title';
import { DatasetColumn } from './entity/dataset_column';
import { DatasetDescription } from './entity/dataset_description';
import { ColumnTitle } from './entity/column_title';

class DatabaseManager {
private datasourceOptions: DataSourceOptions;
private dataSource: DataSource;
private entityManager: EntityManager;
private logger: Logger;

constructor(private config: DataSourceOptions) {
constructor(
private config: DataSourceOptions,
logger: Logger
) {
this.datasourceOptions = config;
this.logger = logger;
}

getDataSource() {
return this.dataSource;
}

getEntityManager(): EntityManager {
if (this.entityManager === undefined)
Promise.resolve(this.initializeDataSource()).catch((error) => this.logger.error(error));
return this.entityManager;
}

async initializeDataSource() {
this.dataSource = new DataSource({
...this.datasourceOptions,
entities: [Datafile]
entities: [Dataset, Datafile, LookupTable, DatasetTitle, DatasetDescription, DatasetColumn, ColumnTitle]
});

await this.dataSource
.initialize()
.then(() => {
console.log('Data source initialized');
this.logger.info('Data source initialized');
})
.catch((error) => console.log(error));
.catch((error) => this.logger.error(error));

this.entityManager = this.dataSource.createEntityManager();
}
}

Expand Down
6 changes: 2 additions & 4 deletions src/models/datafile-dto.ts → src/dtos/datafile-dto.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
export interface DatafileDTO {
id: string;
name: string;
description: string;
sha256hash: string;
created_by: string;
creation_date: Date;
csv_link: string;
xslx_link: string;
view_link: string;
}
Loading

0 comments on commit 6c9bb83

Please sign in to comment.