diff --git a/.github/workflows/check-catalogs.yml b/.github/workflows/check-catalogs.yml new file mode 100644 index 00000000..c2e6e863 --- /dev/null +++ b/.github/workflows/check-catalogs.yml @@ -0,0 +1,35 @@ +name: Check catalogs +on: + schedule: + - cron: '18 3 * * *' + +jobs: + check-catalogs: + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Upgrade pip + run: python -m pip install --upgrade pip + - name: Install Scivision + run: pip install -e . + - name: Check data catalog + run: python ./dev/check_datasets.py + - name: Store artifact + uses: actions/upload-artifact@v3 + with: + name: check-datasets-report + path: check_datasets.csv + - name: Create Release + uses: ncipollo/release-action@v1 + with: + tag: catalog-checks-report-latest-release + name: Catalog checks report + commit: ${{ github.ref }} + allowUpdates: true + removeArtifacts: true + artifacts: check_datasets.js diff --git a/dev/check_datasets.py b/dev/check_datasets.py index 5f56c0c2..2f15aee9 100644 --- a/dev/check_datasets.py +++ b/dev/check_datasets.py @@ -5,10 +5,13 @@ ''' -import pandas as pd +import logging +import json + +from datetime import datetime + from scivision import default_catalog, load_dataset from tqdm import tqdm -import logging # Create Logger logger = logging.getLogger(__name__) @@ -24,7 +27,7 @@ # Load dataset catalog datasources_catalog = default_catalog.datasources.to_dataframe() # Load dataset using load_dataset and record response -rows = [] +rows = {} for index in tqdm(range(datasources_catalog.shape[0])): name = datasources_catalog.loc[index]['name'] print(f'\nValidating: {name}') @@ -39,17 +42,20 @@ check_result = "Fail" response = logger.error(e, exc_info=True) - new_row = { - 'dataset_name': datasources_catalog.loc[index]['name'], + row_data = { 'url': data_url, 'check_result': check_result, 'response': response, } - rows.append(new_row) + rows.update({datasources_catalog.loc[index]['name']: row_data}) -automated_checks_report = pd.DataFrame.from_dict(rows, orient='columns') -automated_checks_report.to_csv('check_datasets.csv', index=False) +automated_checks_report = { + "time": datetime.now().isoformat(), + "report": rows +} +automated_checks_report_json = json.dumps(automated_checks_report) -automated_checks_report = automated_checks_report.set_index('dataset_name') -automated_checks_report.to_json('check_datasets.json', orient="index") +with open('check_datasets.js', 'w') as f: + print('// This file was generated automatically by check_datasets.py', file=f) + print(f'var global_CheckDatasetReport = {automated_checks_report_json};', file=f) diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 146720f0..7fb92197 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -27,6 +27,7 @@ "react-router-dom": "^6.3.0", "react-scripts": "^5.0.1", "react-showdown": "^2.3.1", + "react-use-scripts": "^2.0.3", "showdown": "^2.1.0", "styled-components": "^5.3.10", "web-vitals": "^2.1.4" @@ -15158,6 +15159,18 @@ "react-dom": ">=16.6.0" } }, + "node_modules/react-use-scripts": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/react-use-scripts/-/react-use-scripts-2.0.3.tgz", + "integrity": "sha512-LISd5Ec8WtDCo41/0a5g/5o5RhmHjSAnyOe3mO+rZhtn3O+KjZOx3CUSP4dQyEv0JBK4EnB39fRMuwDEXO7uVw==", + "engines": { + "node": ">=8", + "npm": ">=5" + }, + "peerDependencies": { + "react": "^17.0.2 || ^18" + } + }, "node_modules/read-cache": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz", diff --git a/frontend/package.json b/frontend/package.json index ae5eea7f..565a6251 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -23,6 +23,7 @@ "react-router-dom": "^6.3.0", "react-scripts": "^5.0.1", "react-showdown": "^2.3.1", + "react-use-scripts": "^2.0.3", "showdown": "^2.1.0", "styled-components": "^5.3.10", "web-vitals": "^2.1.4" diff --git a/frontend/public/index.html b/frontend/public/index.html index 4144544d..9bc86e8f 100644 --- a/frontend/public/index.html +++ b/frontend/public/index.html @@ -23,7 +23,10 @@ Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will work correctly both with client-side routing and a non-root public URL. Learn how to configure a non-root public URL by running `npm run build`. - --> + --> + + +