Skip to content

Commit

Permalink
Implement measure handling and fix the cube handler
Browse files Browse the repository at this point in the history
This is pretty complete now other than tests.  There's been some
refactoring work to remove duplicates.  The cube now handles measures
and data type properly.  You can now upload a measure lookup table and
get a preview of it.
  • Loading branch information
j-maynard committed Dec 25, 2024
1 parent 6a9e88c commit c6c8a40
Show file tree
Hide file tree
Showing 14 changed files with 890 additions and 262 deletions.
86 changes: 63 additions & 23 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"@types/better-sqlite3": "^7.6.9",
"@types/cookie-parser": "^1.4.7",
"@types/cors": "^2.8.17",
"@types/detect-character-encoding": "^0.7.2",
"@types/express": ">=4.17.21",
"@types/express-session": "^1.18.0",
"@types/fs-extra": "^9.0.13",
Expand Down Expand Up @@ -82,6 +83,7 @@
"cookie-parser": "^1.4.6",
"csv": "^6.3.8",
"date-fns": "^4.1.0",
"detect-character-encoding": "^0.9.0",
"dotenv": "^16.4.4",
"duckdb-async": "^1.1.3",
"express": ">=4.19.2",
Expand All @@ -91,6 +93,7 @@
"i18next": "^23.10.1",
"i18next-fs-backend": "^2.3.1",
"i18next-http-middleware": "^3.5.0",
"iconv-lite": "^0.6.3",
"jsonwebtoken": "^9.0.2",
"lodash": "^4.17.21",
"multer": "^1.4.5-lts.1",
Expand Down
26 changes: 22 additions & 4 deletions src/controllers/csv-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ import { createHash, randomUUID } from 'node:crypto';
import fs from 'fs';

import { Database, TableData } from 'duckdb-async';
import tmp from 'tmp';
import tmp, { file } from 'tmp';
import detectCharacterEncoding from 'detect-character-encoding';
import iconv from 'iconv-lite';

import { i18next } from '../middleware/translation';
import { logger as parentLogger } from '../utils/logger';
Expand Down Expand Up @@ -155,6 +157,20 @@ export async function extractTableInformation(fileBuffer: Buffer, fileType: File
});
}

function convertBufferToUTF8(buffer: Buffer): Buffer {
const fileEncoding = detectCharacterEncoding(buffer)?.encoding;
if (!fileEncoding) {
logger.warn('Could not detect file encoding for the file');
throw new Error('errors.csv.invalid');
}
if (fileEncoding !== 'UTF-8') {
logger.warn(`File is not UTF-8 encoded... File appears to be ${fileEncoding}... Going to try to recode it`);
const decodedString = iconv.decode(buffer, fileEncoding);
return iconv.encode(decodedString, 'utf-8');
}
return buffer;
}

// Required Methods for refactor
export const uploadCSV = async (
fileBuffer: Buffer,
Expand All @@ -167,6 +183,7 @@ export const uploadCSV = async (
logger.error('No buffer to upload to blob storage');
throw new Error('No buffer to upload to blob storage');
}
let uploadBuffer= fileBuffer;
const factTable = new FactTable();
factTable.id = randomUUID().toLowerCase();
factTable.mimeType = filetype;
Expand All @@ -180,6 +197,7 @@ export const uploadCSV = async (
factTable.delimiter = ',';
factTable.quote = '"';
factTable.linebreak = '\n';
uploadBuffer = convertBufferToUTF8(fileBuffer);
break;
case 'application/vnd.apache.parquet':
case 'application/parquet':
Expand Down Expand Up @@ -225,7 +243,7 @@ export const uploadCSV = async (
}
let factTableDescriptions: FactTableInfo[];
try {
factTableDescriptions = await extractTableInformation(fileBuffer, factTable.fileType);
factTableDescriptions = await extractTableInformation(uploadBuffer, factTable.fileType);
} catch (error) {
logger.error(`Something went wrong trying to read the users file with the following error: ${error}`);
throw error;
Expand All @@ -234,10 +252,10 @@ export const uploadCSV = async (
factTable.filename = `${factTable.id}.${extension}`;
factTable.action = FactTableAction.ReplaceAll;
const hash = createHash('sha256');
hash.update(fileBuffer);
hash.update(uploadBuffer);
try {
await dataLakeService.createDirectory(datasetId);
await dataLakeService.uploadFileBuffer(factTable.filename, datasetId, fileBuffer);
await dataLakeService.uploadFileBuffer(factTable.filename, datasetId, uploadBuffer);
} catch (err) {
logger.error(
`Something went wrong trying to upload the file to the Data Lake with the following error: ${err}`
Expand Down
Loading

0 comments on commit c6c8a40

Please sign in to comment.