Skip to content

Commit

Permalink
Automate datasource catalog check and show in frontend (#594)
Browse files Browse the repository at this point in the history
 - modify the check script to emit javascript that can be loaded by the frontend (working around GitHub CORS)
 - run the check on a schedule and stores the output as a GitHub release artifact
 - load and show the result in the frontend
  • Loading branch information
ots22 authored Aug 9, 2023
1 parent 19b2c41 commit ca18ec2
Show file tree
Hide file tree
Showing 7 changed files with 144 additions and 14 deletions.
35 changes: 35 additions & 0 deletions .github/workflows/check-catalogs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Check catalogs
on:
schedule:
- cron: '18 3 * * *'

jobs:
check-catalogs:
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Upgrade pip
run: python -m pip install --upgrade pip
- name: Install Scivision
run: pip install -e .
- name: Check data catalog
run: python ./dev/check_datasets.py
- name: Store artifact
uses: actions/upload-artifact@v3
with:
name: check-datasets-report
path: check_datasets.csv
- name: Create Release
uses: ncipollo/release-action@v1
with:
tag: catalog-checks-report-latest-release
name: Catalog checks report
commit: ${{ github.ref }}
allowUpdates: true
removeArtifacts: true
artifacts: check_datasets.js
26 changes: 16 additions & 10 deletions dev/check_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@
'''

import pandas as pd
import logging
import json

from datetime import datetime

from scivision import default_catalog, load_dataset
from tqdm import tqdm
import logging

# Create Logger
logger = logging.getLogger(__name__)
Expand All @@ -24,7 +27,7 @@
# Load dataset catalog
datasources_catalog = default_catalog.datasources.to_dataframe()
# Load dataset using load_dataset and record response
rows = []
rows = {}
for index in tqdm(range(datasources_catalog.shape[0])):
name = datasources_catalog.loc[index]['name']
print(f'\nValidating: {name}')
Expand All @@ -39,17 +42,20 @@
check_result = "Fail"
response = logger.error(e, exc_info=True)

new_row = {
'dataset_name': datasources_catalog.loc[index]['name'],
row_data = {
'url': data_url,
'check_result': check_result,
'response': response,
}

rows.append(new_row)
rows.update({datasources_catalog.loc[index]['name']: row_data})

automated_checks_report = pd.DataFrame.from_dict(rows, orient='columns')
automated_checks_report.to_csv('check_datasets.csv', index=False)
automated_checks_report = {
"time": datetime.now().isoformat(),
"report": rows
}
automated_checks_report_json = json.dumps(automated_checks_report)

automated_checks_report = automated_checks_report.set_index('dataset_name')
automated_checks_report.to_json('check_datasets.json', orient="index")
with open('check_datasets.js', 'w') as f:
print('// This file was generated automatically by check_datasets.py', file=f)
print(f'var global_CheckDatasetReport = {automated_checks_report_json};', file=f)
13 changes: 13 additions & 0 deletions frontend/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions frontend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"react-router-dom": "^6.3.0",
"react-scripts": "^5.0.1",
"react-showdown": "^2.3.1",
"react-use-scripts": "^2.0.3",
"showdown": "^2.1.0",
"styled-components": "^5.3.10",
"web-vitals": "^2.1.4"
Expand Down
5 changes: 4 additions & 1 deletion frontend/public/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@
Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
work correctly both with client-side routing and a non-root public URL.
Learn how to configure a non-root public URL by running `npm run build`.
-->
-->

<!-- <script src="https://github.com/alan-turing-institute/scivision/releases/download/catalog-checks-report-latest-release/check_datasets.js" /> -->

<title>Scivision</title>
</head>
<body>
Expand Down
4 changes: 4 additions & 0 deletions frontend/src/App.css
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,7 @@ div.gridtext {
background-color: Lavender;
}

.tooltip-available {
text-decoration-line: underline;
text-decoration-style: dotted;
}
74 changes: 71 additions & 3 deletions frontend/src/table.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import { React } from 'react';
import { React, useState, useEffect } from 'react';

import useScript from "react-use-scripts";

import datasources from './data/datasources.json';
import models from './data/models.json';
Expand Down Expand Up @@ -92,6 +94,36 @@ export function ModelTable() {
// Component: Datasources, table view
// route: /datasources
export function DatasourceTable() {

const [datasourceChecksReport, setDatasourceChecksReport] =
useState(null);

function datasourceCheckResult(name) {
if (datasourceChecksReport !== null) {
const report = datasourceChecksReport.report[name]
if (report !== undefined) {
return report.check_result;
} else {
return "Unknown";
}
} else {
return "Unknown";
}
}

function datasourceCheckTime() {
if (datasourceChecksReport) {
var time = new Date(datasourceChecksReport.time);
return time.toUTCString();
} else {
return "(never)";
}
}

function datasourceValidationTimeString() {
return `last run ${datasourceCheckTime()}`;
}

const columns = [
{
name: 'Thumbnail',
Expand All @@ -107,17 +139,53 @@ export function DatasourceTable() {
selector: row => row.name,
name: 'Name',
sortable: true,
grow: 0.3
grow: 0.5
},
{
selector: row => row.tasks,
name: 'Tasks',
cell: (row, index, column, id) => row.tasks.map(
(t) => <TaskBadge key={t} taskName={t} />
)
),
},
{
selector: row => {
const result = datasourceCheckResult(row.name);

if (result == "Pass") {
return (
<img src="https://img.shields.io/badge/scivision_metadata-pass-green"
title="The metadata for this datasource was successfully loaded by scivision, from the location in the catalog" />
);
} else if (result == "Fail") {
return (
<img src="https://img.shields.io/badge/scivision_metadata-fail-red"
title="Scivision metadata (yaml) file for this datasource failed to load or was missing at the indicated location" />
);
} else {
return (
<img src="https://img.shields.io/badge/scivision_metadata-unknown-lightgray"
title="Could not access the result for this validation check" />
);
}
},
name: (<span className="tooltip-available"
title={datasourceValidationTimeString()}>
Validation checks
</span>),
grow: 0.5
},
];


const check_datasets_script_url = "https://github.com/alan-turing-institute/scivision/releases/download/catalog-checks-report-latest-release/check_datasets.js";

const { ready, error } = useScript({
src: check_datasets_script_url,
onReady: () => setDatasourceChecksReport(window.global_CheckDatasetReport),
onError: () => console.log(`Could not latest dataset checks from ${check_datasets_script_url}`)
});

return (
<DataTable columns={columns} data={datasources.entries} title=""
expandableRowsComponent={(props) => (
Expand Down

0 comments on commit ca18ec2

Please sign in to comment.