From fe979fa77d3c02801c860af604c9a20830516122 Mon Sep 17 00:00:00 2001 From: Philippe Martin Date: Mon, 5 Feb 2024 09:47:44 +0100 Subject: [PATCH] catch error and send telemetry at higher level --- .../src/managers/applicationManager.spec.ts | 6 +- .../src/managers/applicationManager.ts | 86 ++++++++++--------- 2 files changed, 47 insertions(+), 45 deletions(-) diff --git a/packages/backend/src/managers/applicationManager.spec.ts b/packages/backend/src/managers/applicationManager.spec.ts index c614e3f5d..02d0a6e03 100644 --- a/packages/backend/src/managers/applicationManager.spec.ts +++ b/packages/backend/src/managers/applicationManager.spec.ts @@ -604,15 +604,14 @@ describe('buildImages', () => { {} as unknown as ModelsManager, telemetryLogger, ); - test('setTaskState should be called with error and telemetry seent if context does not exist', async () => { + test('setTaskState should be called with error if context does not exist', async () => { vi.spyOn(fs, 'existsSync').mockReturnValue(false); mocks.listImagesMock.mockRejectedValue([]); await expect(manager.buildImages(containers, 'config', taskUtils)).rejects.toThrow( 'Context configured does not exist.', ); - expect(mocks.logErrorMock).toHaveBeenCalled(); }); - test('setTaskState should be called with error and telemetry sent if buildImage executon fails', async () => { + test('setTaskState should be called with error if buildImage executon fails', async () => { vi.spyOn(fs, 'existsSync').mockReturnValue(true); mocks.builImageMock.mockRejectedValue('error'); mocks.listImagesMock.mockRejectedValue([]); @@ -620,7 +619,6 @@ describe('buildImages', () => { 'Something went wrong while building the image: error', ); expect(setTaskStateMock).toBeCalledWith('container1', 'error'); - expect(mocks.logErrorMock).toHaveBeenCalled(); }); test('setTaskState should be called with error if unable to find the image after built', async () => { vi.spyOn(fs, 'existsSync').mockReturnValue(true); diff --git a/packages/backend/src/managers/applicationManager.ts b/packages/backend/src/managers/applicationManager.ts index 943b4f307..cab94e52c 100644 --- a/packages/backend/src/managers/applicationManager.ts +++ b/packages/backend/src/managers/applicationManager.ts @@ -71,39 +71,51 @@ export class ApplicationManager { async pullApplication(recipe: Recipe, model: ModelInfo) { const startTime = performance.now(); - // Create a TaskUtils object to help us - const taskUtil = new RecipeStatusUtils(recipe.id, this.recipeStatusRegistry); - - const localFolder = path.join(this.appUserDirectory, recipe.id); - - // clone the recipe repository on the local folder - const gitCloneInfo: GitCloneInfo = { - repository: recipe.repository, - ref: recipe.ref, - targetDirectory: localFolder, - }; - await this.doCheckout(gitCloneInfo, taskUtil); - - // load and parse the recipe configuration file and filter containers based on architecture, gpu accelerator - // and backend (that define which model supports) - const configAndFilteredContainers = this.getConfigAndFilterContainers(recipe.config, localFolder, taskUtil); - - // get model by downloading it or retrieving locally - const modelPath = await this.modelsManager.downloadModel(model, taskUtil); - - // build all images, one per container (for a basic sample we should have 2 containers = sample app + model service) - const images = await this.buildImages( - configAndFilteredContainers.containers, - configAndFilteredContainers.aiConfigFile.path, - taskUtil, - ); - - // create a pod containing all the containers to run the application - const podInfo = await this.createApplicationPod(images, modelPath, taskUtil); - - await this.runApplication(podInfo, taskUtil); - const durationSeconds = getDurationSecondsSince(startTime); - this.telemetry.logUsage('recipe.pull', { 'recipe.id': recipe.id, 'recipe.name': recipe.name, durationSeconds }); + try { + // Create a TaskUtils object to help us + const taskUtil = new RecipeStatusUtils(recipe.id, this.recipeStatusRegistry); + + const localFolder = path.join(this.appUserDirectory, recipe.id); + + // clone the recipe repository on the local folder + const gitCloneInfo: GitCloneInfo = { + repository: recipe.repository, + ref: recipe.ref, + targetDirectory: localFolder, + }; + await this.doCheckout(gitCloneInfo, taskUtil); + + // load and parse the recipe configuration file and filter containers based on architecture, gpu accelerator + // and backend (that define which model supports) + const configAndFilteredContainers = this.getConfigAndFilterContainers(recipe.config, localFolder, taskUtil); + + // get model by downloading it or retrieving locally + const modelPath = await this.modelsManager.downloadModel(model, taskUtil); + + // build all images, one per container (for a basic sample we should have 2 containers = sample app + model service) + const images = await this.buildImages( + configAndFilteredContainers.containers, + configAndFilteredContainers.aiConfigFile.path, + taskUtil, + ); + + // create a pod containing all the containers to run the application + const podInfo = await this.createApplicationPod(images, modelPath, taskUtil); + + await this.runApplication(podInfo, taskUtil); + const durationSeconds = getDurationSecondsSince(startTime); + this.telemetry.logUsage('recipe.pull', { 'recipe.id': recipe.id, 'recipe.name': recipe.name, durationSeconds }); + } catch (err: unknown) { + const durationSeconds = getDurationSecondsSince(startTime); + this.telemetry.logError('recipe.pull', { + 'recipe.id': recipe.id, + 'recipe.name': recipe.name, + durationSeconds, + message: 'error pulling application', + error: err, + }); + throw err; + } } async runApplication(podInfo: PodInfo, taskUtil: RecipeStatusUtils) { @@ -161,7 +173,6 @@ export class ApplicationManager { await timeout(5000); await this.restartContainerWhenModelServiceIsUp(engineId, modelServiceEndpoint, container).catch( (error: unknown) => { - this.telemetry.logError('recipe.pull', { message: 'error monitoring endpoint', error: error }); console.error('Error monitoring endpoint', error); }, ); @@ -179,7 +190,6 @@ export class ApplicationManager { state: 'error', name: 'Creating application', }); - this.telemetry.logError('recipe.pull', { message: 'error creating pod', error: e }); throw e; } @@ -199,7 +209,6 @@ export class ApplicationManager { state: 'error', name: 'Creating application', }); - this.telemetry.logError('recipe.pull', { message: 'error adding containers to pod', error: e }); throw e; } @@ -348,7 +357,6 @@ export class ApplicationManager { if (!fs.existsSync(context)) { console.error('The context provided does not exist.'); taskUtil.setTaskState(container.name, 'error'); - this.telemetry.logError('recipe.pull', { message: 'configured context does not exist' }); throw new Error('Context configured does not exist.'); } @@ -364,7 +372,6 @@ export class ApplicationManager { // todo: do something with the event if (event === 'error' || (event === 'finish' && data !== '')) { console.error('Something went wrong while building the image: ', data); - this.telemetry.logError('recipe.pull', { message: 'error building image' }); taskUtil.setTaskState(container.name, 'error'); } }, @@ -372,7 +379,6 @@ export class ApplicationManager { ) .catch((err: unknown) => { console.error('Something went wrong while building the image: ', err); - this.telemetry.logError('recipe.pull', { message: 'error building image', error: err }); taskUtil.setTaskState(container.name, 'error'); throw new Error(`Something went wrong while building the image: ${String(err)}`); }); @@ -431,7 +437,6 @@ export class ApplicationManager { } catch (e) { loadingConfiguration.state = 'error'; taskUtil.setTask(loadingConfiguration); - this.telemetry.logError('recipe.pull', { message: 'error loading configuration', error: e }); throw e; } @@ -445,7 +450,6 @@ export class ApplicationManager { // Mark as failure. loadingConfiguration.state = 'error'; taskUtil.setTask(loadingConfiguration); - this.telemetry.logError('recipe.pull', { message: 'no container available' }); throw new Error('No containers available.'); }