From f99e8641833e5aa4303fd0c133dbc93f7d0e3a87 Mon Sep 17 00:00:00 2001 From: yianzhang14 <78504318+yianzhang14@users.noreply.github.com> Date: Mon, 9 Dec 2024 11:20:01 -0600 Subject: [PATCH] Refresh fix (#134) * refactored statistic * refactoring + documentation * finished refactoring on maintainers * finished connector refactoring * finished eslinting all remaining ts files * documented server.ts + added markdown files * refactored cli.ts * fixed some typing issues * revamped packages + redid express structure + more linting * more type hinting + async removal on models * linting on main src/ files * changes to type assertions * finished maintainers * make types more meaningful * finished with util rework + minor changes elsewhere * finished linting + helper rework + better nullguarding + max line size * fixed helper typo & slurmconnector doc * add specialized manifest download functionality + lint action * documentation initial commit + helper moving + abstract functions + initial lazy supervisor * finished codebase overview doc * finished codebase documentation doc * added caching to folder uploaders + a bit more linting * disable cached upload on data * added cache refresing functionality * fixed swagger error * fixed cli * readd typedoc * more package updates * added mysql * disable uml * move all documentation out of build * fixed generate swagger bug * reenable documentation * fixed gitutil bug + updated packages * recursive mkdirs * more gitutil bugfixes * silenced console.logs * reenabled urlencoded * fixed folder uploader bug * more bugfixing + removing console logs * fixed remotefsexists bug * job upload now works * update build script to copy example jsons * fixed github actions * added ruff for the python files for now * add js globus sdk * added typing to cachedupload * switched to git sdk * switch to fs promises * automated branch resolution + out of date checking * moved configs to example * undo config.json move * fixed action typo * removed all refreshing code + added automatic logic to decide when to re-upload git repo * added yaml parser * add js globus sdk * sync package lock * rework folder typing to be more enumy * leftover changes from folder rework * added cache model * force synchronize * synchronize in the json * actually force reinitialize * turned strict on + updated packages + migrated to typeorm 0.3 + migrated to redis 5 + cleaned up redis classes * reworked tsconfig * remove typeorm-uml * actually disable uml + add cache to entities * actually disable uml instead of tsdoc * fixed redis * connect to redis client * reworked cache + deprecated specialized manifest + added more git caching * reworked refreshing in getexecutablemanifest * reworked db to not be a class per typeorm guidelines * leftover db port changes * remove ormconfig * initialize database in server.ts * fixed some minor errors * initialize datasource in init hello world * reenable register * move this.register * fix db registering * fix db registering * add print statements * debugging job2object * updated dockerfile + removed print statements * fixed typing on nullable model columns * better undefined/null handling + typeorm function fixes * added more is_testing clauses to console logs * fixed registercache * more fixes to hpcpath -> cachepath * disable removing comments * styling change + git manifest cache + new git routes * rework server structure * initialize cache * change rimraf * changed promises to awaits * change unix time scaling * fix linting in cli * undo examples folder * rename a file * fixed merge bugs * fixed linting * change the eslint config * revert back to commonjs * change global variables * disable typeorm redis caching * update packages * changed file structure + updated linting * update file paths * update file paths again * fix paths and eslint * more path fixes * added axios-based globus ts impl * fix globus imports * fix entities * more typeorm fixes * fix typeorm again * change a path * add logging * add submission id retrieval * fix caching invalidation * add sanity check for verifying cache refresh --- src/utils/FolderUploader.ts | 88 +++++++++++++++++++++++++++---------- 1 file changed, 66 insertions(+), 22 deletions(-) diff --git a/src/utils/FolderUploader.ts b/src/utils/FolderUploader.ts index 5dd11903..a5249030 100644 --- a/src/utils/FolderUploader.ts +++ b/src/utils/FolderUploader.ts @@ -228,10 +228,21 @@ abstract class CachedFolderUploader extends BaseFolderUploader { } } + /** + * Determines whether a cached directory actually exists on a remote HPC. + * + * @private + * @return {Promise} true if the directory exists, false otherwise + */ private async cacheExists(): Promise { return this.connector.remoteFsExists(this.cachePath); } + /** + * Explicitly removes a remote cached directory, if it exists. + * + * @private + */ private async clearCache() { if (!(await this.cacheExists())) { return; @@ -240,28 +251,60 @@ abstract class CachedFolderUploader extends BaseFolderUploader { await this.connector.rm(this.cachePath); } + /** + * Unzips a cached zip file to the hpc path, where it will be used in jobs. + * + * @private + */ private async pullFromCache() { // assert cached file exists await this.connector.unzip(this.cachePath, this.hpcPath); } + /** + * Abstract function implemented by more concrete folder uploaders. Encompasses the general functionality + * of uploading the files associated with a given job (as encapsulated by a general folder uploader) to + * the internally stored cache path. + * + * @protected + * @abstract + * @param {boolean} _force whether or not to force upload (used for force refresh) + */ protected abstract uploadToCache(): Promise; - public async refreshCache() { - await this.clearCache(); - await this.uploadToCache(); - } + /** + * Abstract function implemented by more concrete folder uploaders. Encompasses the general requirement to + * determine when job files were last truly updated externally, to be compared with the stored update times in + * the database to decide whether to refrehs. + * + * @protected + * @abstract + * @return {Promise} Last update time in UNIX time (milliseconds) + */ + protected abstract getCanonicalUpdateTime(): Promise; public async cachedUpload() { - if (!(await this.cacheExists())) { - await this.refreshCache(); + const recordedUpdate = await this.getRecordedUpdateTime(); + const canonicalUpdate = await this.getCanonicalUpdateTime(); + + if (recordedUpdate >= 0 && canonicalUpdate >= 0 + && (recordedUpdate / canonicalUpdate > 100 || canonicalUpdate / recordedUpdate > 100)) { + console.error("Comparing seconds and milliseconds for cache refresh check", recordedUpdate, canonicalUpdate); + } + + // upload if it doesn't exist or the cache is stale + if (!(await this.cacheExists()) + || recordedUpdate < canonicalUpdate + ) { + await this.uploadToCache(); + await this.registerCache(); } await this.pullFromCache(); } - protected async getUpdateTime(): Promise { + protected async getRecordedUpdateTime(): Promise { const exists = await dataSource.getRepository(Cache).findOneBy({ hpc: this.hpcName, hpcPath: this.cachePath @@ -392,6 +435,11 @@ class GlobusFolderUploader extends CachedFolderUploader { // eslint-disable-lin // await this.connector.zip(uploadPath, this.cachePath); // await this.connector.rm(uploadPath); } + + // eslint-disable-next-line @typescript-eslint/require-await + protected async getCanonicalUpdateTime(): Promise { + throw new NotImplementedError("Not implemented"); + } } /** @@ -452,6 +500,11 @@ export class LocalFolderUploader extends CachedFolderUploader { // need some way to detect cache invalidation throw new NotImplementedError("Not implemented"); } + + // eslint-disable-next-line @typescript-eslint/require-await + protected async getCanonicalUpdateTime(): Promise { + throw new NotImplementedError("Not implemented"); + } } /** @@ -474,28 +527,19 @@ export class GitFolderUploader extends LocalFolderUploader { super({ type: "local", localPath }, hpcName, userId, connector); this.gitId = from.gitId; } - /** - * Specialization of cache upload for uploading a git folder. Has functionality to upload if and only if - * the cached git repository is out of date. - * - * @protected - */ - protected async uploadToCache(): Promise { + + protected async getCanonicalUpdateTime(): Promise { const git = await GitUtil.findGit(this.gitId); if (!git) { throw Error("Could not find git repository to upload."); } - const cacheUpdateTime = await this.getUpdateTime(); - // account for milliseconds - const localUpdateTime = await GitUtil.getLastCommitTime(git) * 1000; - - if (cacheUpdateTime < localUpdateTime) { - await this.uploadToPath(this.cachePath); + return (await GitUtil.getLastCommitTime(git)) * 1000; + } - await this.registerCache(); - } + protected async uploadToCache() { + await this.uploadToPath(this.cachePath); await this.register(); }