diff --git a/web/src/core/usecases/dataExplorer/selectors.ts b/web/src/core/usecases/dataExplorer/selectors.ts index 3895f487e..12340ef8d 100644 --- a/web/src/core/usecases/dataExplorer/selectors.ts +++ b/web/src/core/usecases/dataExplorer/selectors.ts @@ -42,40 +42,39 @@ const main = createSelector(state, columns, (state, columns) => { const { isQuerying, queryParams, errorMessage, data, extraRestorableStates } = state; if (errorMessage !== undefined) { - return { isQuerying, errorMessage: errorMessage }; + return { isQuerying, errorMessage: errorMessage, queryParams }; } - if (data.state === "empty") { - return { - isQuerying, - rows: undefined - }; - } + switch (data.state) { + case "empty": + return { + isQuerying, + rows: undefined + }; + case "unknownFileType": + return { isQuerying, queryParams, shouldAskFileType: true }; + case "loaded": { + assert(columns !== undefined); + assert(queryParams !== undefined); + assert(queryParams.rowsPerPage !== undefined); + assert(queryParams.page !== undefined); + assert(extraRestorableStates !== undefined); - if (data.state === "unknownFileType") { - return { isQuerying, queryParams, shouldAskFileType: true }; + const { rowsPerPage, page } = queryParams; + return { + isQuerying, + rows: data.rows.map((row, i) => ({ + id: i + rowsPerPage * (page - 1), + ...row + })), + rowCount: data.rowCount, + queryParams, + extraRestorableStates, + fileDownloadUrl: data.fileDownloadUrl, + columns + }; + } } - - assert(columns !== undefined); - assert(queryParams !== undefined); - assert(queryParams.rowsPerPage !== undefined); - assert(queryParams.page !== undefined); - assert(extraRestorableStates !== undefined); - - const { rowsPerPage, page } = queryParams; - - return { - isQuerying, - rows: data.rows.map((row, i) => ({ - id: i + rowsPerPage * (page - 1), - ...row - })), - rowCount: data.rowCount, - queryParams, - extraRestorableStates, - fileDownloadUrl: data.fileDownloadUrl, - columns - }; }); export const selectors = { main }; diff --git a/web/src/core/usecases/dataExplorer/state.ts b/web/src/core/usecases/dataExplorer/state.ts index d520fedbb..c4e96e484 100644 --- a/web/src/core/usecases/dataExplorer/state.ts +++ b/web/src/core/usecases/dataExplorer/state.ts @@ -42,21 +42,6 @@ export const { actions, reducer } = createUsecaseActions({ data: { state: "empty" } }), reducers: { - queryStarted: ( - state, - { - payload - }: { - payload: { - queryParams: NonNullable; - }; - } - ) => { - const { queryParams } = payload; - state.errorMessage = undefined; - state.isQuerying = true; - state.queryParams = queryParams; - }, extraRestorableStateSet: ( state, { @@ -98,7 +83,21 @@ export const { actions, reducer } = createUsecaseActions({ assert(state.extraRestorableStates !== undefined); state.extraRestorableStates.columnVisibility = columnVisibility; }, - + queryStarted: ( + state, + { + payload + }: { + payload: { + queryParams: NonNullable; + }; + } + ) => { + const { queryParams } = payload; + state.errorMessage = undefined; + state.isQuerying = true; + state.queryParams = queryParams; + }, querySucceeded: ( state, { @@ -115,6 +114,10 @@ export const { actions, reducer } = createUsecaseActions({ const { rowCount, rows, fileDownloadUrl, fileType } = payload; state.isQuerying = false; state.data = { state: "loaded", rowCount, rows, fileDownloadUrl, fileType }; + state.extraRestorableStates = { + selectedRowIndex: undefined, + columnVisibility: {} + }; }, //Rename this, i want to end query because not able to auto detect fileType terminateQueryDueToUnknownFileType: ( @@ -143,7 +146,6 @@ export const { actions, reducer } = createUsecaseActions({ const { errorMessage } = payload; state.isQuerying = false; state.errorMessage = errorMessage; - state.queryParams = undefined; }, restoreState: state => { state.queryParams = undefined; diff --git a/web/src/core/usecases/dataExplorer/thunks.ts b/web/src/core/usecases/dataExplorer/thunks.ts index 41880e023..319755c13 100644 --- a/web/src/core/usecases/dataExplorer/thunks.ts +++ b/web/src/core/usecases/dataExplorer/thunks.ts @@ -83,7 +83,11 @@ const privateThunks = { fileDownloadUrl: data.fileDownloadUrl }; } - return dispatch(privateThunks.detectFileType({ sourceUrl })); + const toto = await dispatch( + privateThunks.detectFileType({ sourceUrl }) + ); + console.log(toto); + return toto; })(); if (fileType === undefined) { @@ -202,57 +206,97 @@ const privateThunks = { privateThunks.getFileDonwloadUrl({ sourceUrl }) ); - const contentType = await (async () => { - try { - const response = await fetch(fileDownloadUrl, { method: "HEAD" }); + try { + const response = await fetch(fileDownloadUrl, { + method: "GET", + headers: { Range: "bytes=0-15" } // Fetch the first 16 bytes + }); + + if (!response.ok) { + return { fileType: undefined, fileDownloadUrl }; + } + + const contentType = response.headers.get("Content-Type"); - if (!response.ok) { + const filTypeExtractdByContentType = (() => { + if (!contentType) { return undefined; } - return response.headers.get("Content-Type") ?? undefined; - } catch (error) { - return undefined; - } - })(); + const contentTypeToExtension = [ + { + keyword: "application/parquet" as const, + extension: "parquet" as const + }, + { + keyword: "application/x-parquet" as const, + extension: "parquet" as const + }, + { keyword: "text/csv" as const, extension: "csv" as const }, + { + keyword: "application/csv" as const, + extension: "csv" as const + }, + { + keyword: "application/json" as const, + extension: "json" as const + }, + { keyword: "text/json" as const, extension: "json" as const } + ]; + + const match = contentTypeToExtension.find( + ({ keyword }) => contentType === keyword + ); + return match ? match.extension : undefined; + })(); - const contentTypeToExtension = [ - { - keyword: "application/parquet" as const, - extension: "parquet" as const - }, - { - keyword: "application/x-parquet" as const, - extension: "parquet" as const - }, - { keyword: "text/csv" as const, extension: "csv" as const }, - { keyword: "application/csv" as const, extension: "csv" as const }, - { keyword: "application/json" as const, extension: "json" as const }, - { keyword: "text/json" as const, extension: "json" as const } - ]; - - const getExtensionFromContentType = ( - contentType?: string - ): ValidFileType | undefined => { - if (!contentType) { - return undefined; + if (filTypeExtractdByContentType !== undefined) { + return { fileType: filTypeExtractdByContentType, fileDownloadUrl }; } - const match = contentTypeToExtension.find( - ({ keyword }) => contentType === keyword - ); - return match ? match.extension : undefined; - }; + const fileSignatures = [ + { + condition: (bytes: Uint8Array) => + bytes[0] === 80 && + bytes[1] === 65 && + bytes[2] === 82 && + bytes[3] === 49, // "PAR1" + extension: "parquet" as const + }, + { + condition: (bytes: Uint8Array) => [91, 123].includes(bytes[0]), // "[" or "{" + extension: "json" as const // JSON + }, + { + condition: (bytes: Uint8Array) => { + const fileContent = new TextDecoder().decode(bytes); + return ( + fileContent.includes(",") || + fileContent.includes("\n") || + fileContent.includes(";") + ); // CSV heuristic + }, + extension: "csv" as const + } + ]; - return { - fileType: getExtensionFromContentType(contentType), - fileDownloadUrl - }; - }, - /* - getParquetMetadata: (params: { sourceUrl: string }) => async () => {}, + const arrayBuffer = await response.arrayBuffer(); + const bytes = new Uint8Array(arrayBuffer); - */ + const match = fileSignatures.find(({ condition }) => condition(bytes)); + + if (match) { + return { fileType: match.extension, fileDownloadUrl }; + } + } catch (error) { + console.error("Failed to fetch file for type detection:", error); + //TODO: reject an error + return { fileType: undefined, fileDownloadUrl }; + } + + //Ask user to manualy specify the file type + return { fileType: undefined, fileDownloadUrl }; + }, updateDataSource: (params: { queryParams: { diff --git a/web/src/ui/pages/dataExplorer/DataExplorer.tsx b/web/src/ui/pages/dataExplorer/DataExplorer.tsx index 3505da6d7..8bf70db28 100644 --- a/web/src/ui/pages/dataExplorer/DataExplorer.tsx +++ b/web/src/ui/pages/dataExplorer/DataExplorer.tsx @@ -136,6 +136,7 @@ export default function DataExplorer(props: Props) {
{(() => { if (errorMessage !== undefined) { + console.log(queryParams); return ( {errorMessage}