From ca18ec211ed23c58257b709fb5b517e57a77d0f9 Mon Sep 17 00:00:00 2001 From: ots22 Date: Wed, 9 Aug 2023 10:12:26 +0100 Subject: [PATCH] Automate datasource catalog check and show in frontend (#594) - modify the check script to emit javascript that can be loaded by the frontend (working around GitHub CORS) - run the check on a schedule and stores the output as a GitHub release artifact - load and show the result in the frontend --- .github/workflows/check-catalogs.yml | 35 +++++++++++++ dev/check_datasets.py | 26 ++++++---- frontend/package-lock.json | 13 +++++ frontend/package.json | 1 + frontend/public/index.html | 5 +- frontend/src/App.css | 4 ++ frontend/src/table.js | 74 ++++++++++++++++++++++++++-- 7 files changed, 144 insertions(+), 14 deletions(-) create mode 100644 .github/workflows/check-catalogs.yml diff --git a/.github/workflows/check-catalogs.yml b/.github/workflows/check-catalogs.yml new file mode 100644 index 00000000..c2e6e863 --- /dev/null +++ b/.github/workflows/check-catalogs.yml @@ -0,0 +1,35 @@ +name: Check catalogs +on: + schedule: + - cron: '18 3 * * *' + +jobs: + check-catalogs: + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Upgrade pip + run: python -m pip install --upgrade pip + - name: Install Scivision + run: pip install -e . + - name: Check data catalog + run: python ./dev/check_datasets.py + - name: Store artifact + uses: actions/upload-artifact@v3 + with: + name: check-datasets-report + path: check_datasets.csv + - name: Create Release + uses: ncipollo/release-action@v1 + with: + tag: catalog-checks-report-latest-release + name: Catalog checks report + commit: ${{ github.ref }} + allowUpdates: true + removeArtifacts: true + artifacts: check_datasets.js diff --git a/dev/check_datasets.py b/dev/check_datasets.py index 5f56c0c2..2f15aee9 100644 --- a/dev/check_datasets.py +++ b/dev/check_datasets.py @@ -5,10 +5,13 @@ ''' -import pandas as pd +import logging +import json + +from datetime import datetime + from scivision import default_catalog, load_dataset from tqdm import tqdm -import logging # Create Logger logger = logging.getLogger(__name__) @@ -24,7 +27,7 @@ # Load dataset catalog datasources_catalog = default_catalog.datasources.to_dataframe() # Load dataset using load_dataset and record response -rows = [] +rows = {} for index in tqdm(range(datasources_catalog.shape[0])): name = datasources_catalog.loc[index]['name'] print(f'\nValidating: {name}') @@ -39,17 +42,20 @@ check_result = "Fail" response = logger.error(e, exc_info=True) - new_row = { - 'dataset_name': datasources_catalog.loc[index]['name'], + row_data = { 'url': data_url, 'check_result': check_result, 'response': response, } - rows.append(new_row) + rows.update({datasources_catalog.loc[index]['name']: row_data}) -automated_checks_report = pd.DataFrame.from_dict(rows, orient='columns') -automated_checks_report.to_csv('check_datasets.csv', index=False) +automated_checks_report = { + "time": datetime.now().isoformat(), + "report": rows +} +automated_checks_report_json = json.dumps(automated_checks_report) -automated_checks_report = automated_checks_report.set_index('dataset_name') -automated_checks_report.to_json('check_datasets.json', orient="index") +with open('check_datasets.js', 'w') as f: + print('// This file was generated automatically by check_datasets.py', file=f) + print(f'var global_CheckDatasetReport = {automated_checks_report_json};', file=f) diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 146720f0..7fb92197 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -27,6 +27,7 @@ "react-router-dom": "^6.3.0", "react-scripts": "^5.0.1", "react-showdown": "^2.3.1", + "react-use-scripts": "^2.0.3", "showdown": "^2.1.0", "styled-components": "^5.3.10", "web-vitals": "^2.1.4" @@ -15158,6 +15159,18 @@ "react-dom": ">=16.6.0" } }, + "node_modules/react-use-scripts": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/react-use-scripts/-/react-use-scripts-2.0.3.tgz", + "integrity": "sha512-LISd5Ec8WtDCo41/0a5g/5o5RhmHjSAnyOe3mO+rZhtn3O+KjZOx3CUSP4dQyEv0JBK4EnB39fRMuwDEXO7uVw==", + "engines": { + "node": ">=8", + "npm": ">=5" + }, + "peerDependencies": { + "react": "^17.0.2 || ^18" + } + }, "node_modules/read-cache": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz", diff --git a/frontend/package.json b/frontend/package.json index ae5eea7f..565a6251 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -23,6 +23,7 @@ "react-router-dom": "^6.3.0", "react-scripts": "^5.0.1", "react-showdown": "^2.3.1", + "react-use-scripts": "^2.0.3", "showdown": "^2.1.0", "styled-components": "^5.3.10", "web-vitals": "^2.1.4" diff --git a/frontend/public/index.html b/frontend/public/index.html index 4144544d..9bc86e8f 100644 --- a/frontend/public/index.html +++ b/frontend/public/index.html @@ -23,7 +23,10 @@ Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will work correctly both with client-side routing and a non-root public URL. Learn how to configure a non-root public URL by running `npm run build`. - --> + --> + + + Scivision diff --git a/frontend/src/App.css b/frontend/src/App.css index b4102d92..ab1c21c1 100644 --- a/frontend/src/App.css +++ b/frontend/src/App.css @@ -36,3 +36,7 @@ div.gridtext { background-color: Lavender; } +.tooltip-available { + text-decoration-line: underline; + text-decoration-style: dotted; +} diff --git a/frontend/src/table.js b/frontend/src/table.js index 4c6a3d0d..f6bcd8ad 100644 --- a/frontend/src/table.js +++ b/frontend/src/table.js @@ -1,4 +1,6 @@ -import { React } from 'react'; +import { React, useState, useEffect } from 'react'; + +import useScript from "react-use-scripts"; import datasources from './data/datasources.json'; import models from './data/models.json'; @@ -92,6 +94,36 @@ export function ModelTable() { // Component: Datasources, table view // route: /datasources export function DatasourceTable() { + + const [datasourceChecksReport, setDatasourceChecksReport] = + useState(null); + + function datasourceCheckResult(name) { + if (datasourceChecksReport !== null) { + const report = datasourceChecksReport.report[name] + if (report !== undefined) { + return report.check_result; + } else { + return "Unknown"; + } + } else { + return "Unknown"; + } + } + + function datasourceCheckTime() { + if (datasourceChecksReport) { + var time = new Date(datasourceChecksReport.time); + return time.toUTCString(); + } else { + return "(never)"; + } + } + + function datasourceValidationTimeString() { + return `last run ${datasourceCheckTime()}`; + } + const columns = [ { name: 'Thumbnail', @@ -107,17 +139,53 @@ export function DatasourceTable() { selector: row => row.name, name: 'Name', sortable: true, - grow: 0.3 + grow: 0.5 }, { selector: row => row.tasks, name: 'Tasks', cell: (row, index, column, id) => row.tasks.map( (t) => - ) + ), + }, + { + selector: row => { + const result = datasourceCheckResult(row.name); + + if (result == "Pass") { + return ( + + ); + } else if (result == "Fail") { + return ( + + ); + } else { + return ( + + ); + } + }, + name: ( + Validation checks + ), + grow: 0.5 }, ]; + + const check_datasets_script_url = "https://github.com/alan-turing-institute/scivision/releases/download/catalog-checks-report-latest-release/check_datasets.js"; + + const { ready, error } = useScript({ + src: check_datasets_script_url, + onReady: () => setDatasourceChecksReport(window.global_CheckDatasetReport), + onError: () => console.log(`Could not latest dataset checks from ${check_datasets_script_url}`) + }); + return ( (