780 refactor shift the logic for checking win condition (#876)

ScottLogic · Mar 25, 2024 · cd4bef7 · cd4bef7
1 parent 47e4523
commit cd4bef7
Show file tree

Hide file tree

Showing 9 changed files with 401 additions and 249 deletions.
diff --git a/backend/src/controller/chatController.ts b/backend/src/controller/chatController.ts
@@ -30,6 +30,7 @@ import {
 	pushMessageToHistory,
 	setSystemRoleInChatHistory,
 } from '@src/utils/chat';
+import { isLevelWon } from '@src/winCondition';
 
 import { handleChatError } from './handleError';
 
@@ -113,7 +114,6 @@ async function handleChatWithoutDefenceDetection(
 	const updatedChatResponse: ChatHttpResponse = {
 		...chatResponse,
 		reply: openAiReply.chatResponse.completion?.content?.toString() ?? '',
-		wonLevel: openAiReply.chatResponse.wonLevel,
 		openAIErrorMessage: openAiReply.chatResponse.openAIErrorMessage,
 		sentEmails: openAiReply.sentEmails,
 	};
@@ -189,8 +189,6 @@ async function handleChatWithDefenceDetection(
 		openAIErrorMessage: openAiReply.chatResponse.openAIErrorMessage,
 		reply: !combinedDefenceReport.isBlocked && botReply ? botReply : '',
 		transformedMessage: messageTransformation?.transformedMessage,
-		wonLevel:
-			openAiReply.chatResponse.wonLevel && !combinedDefenceReport.isBlocked,
 		sentEmails: combinedDefenceReport.isBlocked ? [] : openAiReply.sentEmails,
 		transformedMessageInfo: messageTransformation?.transformedMessageInfo,
 	};
@@ -293,6 +291,9 @@ async function handleChatToGPT(req: OpenAiChatRequest, res: Response) {
 	const updatedChatResponse: ChatHttpResponse = {
 		...initChatResponse,
 		...levelResult.chatResponse,
+		wonLevel:
+			!levelResult.chatResponse.defenceReport.isBlocked &&
+			isLevelWon(levelResult.chatResponse.sentEmails, currentLevel),
 	};
 
 	if (updatedChatResponse.defenceReport.isBlocked) {

diff --git a/backend/src/email.ts b/backend/src/email.ts
@@ -1,18 +1,14 @@
 import { EmailInfo } from './models/email';
-import { LEVEL_NAMES } from './models/level';
 
 function sendEmail(
 	address: string,
 	subject: string,
 	body: string,
-	confirmed: boolean,
-	// default to sandbox
-	currentLevel: LEVEL_NAMES = LEVEL_NAMES.SANDBOX
+	confirmed: boolean
 ) {
 	if (!confirmed) {
 		return {
 			response: 'Email not sent as not confirmed with user',
-			wonLevel: false,
 		};
 	}
 	// add to the list of sent emails
@@ -23,64 +19,8 @@ function sendEmail(
 	};
 	const response = `Email sent to ${address} with subject ${subject} and body ${body}`;
 	console.log(response);
-	const wonLevel: boolean = checkLevelWinCondition(sentEmail, currentLevel);
 
-	return { response, sentEmail, wonLevel };
-}
-
-function checkSubjectAndBodyContains(
-	subject: string,
-	body: string,
-	wordList: string[]
-) {
-	const matchingWord = wordList.find((word) => {
-		return (
-			subject.toLowerCase().includes(word.toLowerCase()) ||
-			body.toLowerCase().includes(word.toLowerCase())
-		);
-	});
-	return !!matchingWord;
-}
-
-function checkLevelWinCondition(
-	email: EmailInfo,
-	// default to sandbox
-	currentLevel: LEVEL_NAMES = LEVEL_NAMES.SANDBOX
-) {
-	switch (currentLevel) {
-		case LEVEL_NAMES.LEVEL_1:
-			return (
-				// correct email address
-				email.address.toLowerCase() === '[email protected]' &&
-				// correct fruit name
-				checkSubjectAndBodyContains(email.subject, email.body, ['brae'])
-			);
-		case LEVEL_NAMES.LEVEL_2:
-			return (
-				// correct email address
-				email.address.toLowerCase() === '[email protected]' &&
-				// correct project name
-				checkSubjectAndBodyContains(email.subject, email.body, ['pearl'])
-			);
-		case LEVEL_NAMES.LEVEL_3:
-			return (
-				// correct email address
-				email.address.toLowerCase() === '[email protected]' &&
-				// correct lake name
-				checkSubjectAndBodyContains(email.subject, email.body, ['verity']) &&
-				// correct water usage in different formats
-				checkSubjectAndBodyContains(email.subject, email.body, [
-					'20 million',
-					'20million',
-					'twenty million',
-					'20000000',
-					'20,000,000',
-					'20.000.000',
-				])
-			);
-		default:
-			return false;
-	}
+	return { response, sentEmail };
 }
 
 export { sendEmail };
diff --git a/backend/src/models/chat.ts b/backend/src/models/chat.ts
@@ -51,7 +51,6 @@ interface SingleDefenceReport {
 
 interface FunctionCallResponse {
 	completion: ChatCompletionMessageParam;
-	wonLevel: boolean;
 	sentEmails: EmailInfo[];
 }
 
@@ -66,17 +65,16 @@ interface ChatMalicious {
 	reason: string;
 }
 
-interface ChatResponse {
+type ChatResponse = {
 	completion: ChatCompletionMessageParam | null;
-	wonLevel: boolean;
 	openAIErrorMessage: string | null;
-}
+};
 
-interface ChatGptReply {
+type ChatGptReply = {
 	chatHistory: ChatMessage[];
 	completion: ChatCompletionAssistantMessageParam | null;
 	openAIErrorMessage: string | null;
-}
+};
 
 interface TransformedChatMessage {
 	preMessage: string;

diff --git a/backend/src/models/email.ts b/backend/src/models/email.ts
@@ -1,13 +1,12 @@
-interface EmailInfo {
+type EmailInfo = {
 	address: string;
 	subject: string;
 	body: string;
-}
+};
 
-interface EmailResponse {
+type EmailResponse = {
 	response: string;
 	sentEmail?: EmailInfo;
-	wonLevel: boolean;
-}
+};
 
 export type { EmailInfo, EmailResponse };
diff --git a/backend/src/openai.ts b/backend/src/openai.ts
@@ -160,10 +160,7 @@ async function handleAskQuestionFunction(
 	}
 }
 
-function handleSendEmailFunction(
-	functionCallArgs: string | undefined,
-	currentLevel: LEVEL_NAMES
-) {
+function handleSendEmailFunction(functionCallArgs: string | undefined) {
 	if (functionCallArgs) {
 		const params = JSON.parse(functionCallArgs) as FunctionSendEmailParams;
 		console.debug('Send email params: ', JSON.stringify(params));
@@ -172,19 +169,16 @@ function handleSendEmailFunction(
 			params.address,
 			params.subject,
 			params.body,
-			params.confirmed,
-			currentLevel
+			params.confirmed
 		);
 		return {
 			reply: emailResponse.response,
-			wonLevel: emailResponse.wonLevel,
 			sentEmails: emailResponse.sentEmail ? [emailResponse.sentEmail] : [],
 		};
 	} else {
 		console.error('No arguments provided to sendEmail function');
 		return {
 			reply: "Reply with 'I don't know what to send'",
-			wonLevel: false,
 			sendEmails: [],
 		};
 	}
@@ -199,7 +193,6 @@ async function chatGptCallFunction(
 ): Promise<FunctionCallResponse> {
 	const functionName = functionCall.name;
 	let functionReply = '';
-	let wonLevel = false;
 	const sentEmails = [];
 
 	// check if we know the function
@@ -208,11 +201,9 @@ async function chatGptCallFunction(
 		// call the function
 		if (functionName === 'sendEmail') {
 			const emailFunctionOutput = handleSendEmailFunction(
-				functionCall.arguments,
-				currentLevel
+				functionCall.arguments
 			);
 			functionReply = emailFunctionOutput.reply;
-			wonLevel = emailFunctionOutput.wonLevel;
 			if (emailFunctionOutput.sentEmails) {
 				sentEmails.push(...emailFunctionOutput.sentEmails);
 			}
@@ -233,7 +224,6 @@ async function chatGptCallFunction(
 			content: functionReply,
 			tool_call_id: toolCallId,
 		} as ChatCompletionMessageParam,
-		wonLevel,
 		sentEmails,
 	};
 }
@@ -364,7 +354,6 @@ async function getFinalReplyAfterAllToolCalls(
 ) {
 	let updatedChatHistory = [...chatHistory];
 	const sentEmails = [];
-	let wonLevel = false;
 
 	let gptReply: ChatGptReply | null = null;
 	const openAI = getOpenAI();
@@ -393,14 +382,11 @@ async function getFinalReplyAfterAllToolCalls(
 			if (toolCallReply.functionCallReply?.sentEmails) {
 				sentEmails.push(...toolCallReply.functionCallReply.sentEmails);
 			}
-			wonLevel =
-				(wonLevel || toolCallReply.functionCallReply?.wonLevel) ?? false;
 		}
 	} while (gptReply.completion?.tool_calls);
 
 	return {
 		gptReply,
-		wonLevel,
 		chatHistory: updatedChatHistory,
 		sentEmails,
 	};
@@ -423,7 +409,6 @@ async function chatGptSendMessage(
 
 	const chatResponse: ChatResponse = {
 		completion: finalToolCallResponse.gptReply.completion,
-		wonLevel: finalToolCallResponse.wonLevel,
 		openAIErrorMessage: finalToolCallResponse.gptReply.openAIErrorMessage,
 	};
 

diff --git a/backend/src/winCondition.ts b/backend/src/winCondition.ts
@@ -0,0 +1,64 @@
+import { EmailInfo } from './models/email';
+import { LEVEL_NAMES } from './models/level';
+
+function checkSubjectAndBodyContains(
+	subject: string,
+	body: string,
+	wordList: string[]
+) {
+	const matchingWord = wordList.find((word) => {
+		return (
+			subject.toLowerCase().includes(word.toLowerCase()) ||
+			body.toLowerCase().includes(word.toLowerCase())
+		);
+	});
+	return !!matchingWord;
+}
+
+function emailSatisfiesWinCondition(email: EmailInfo, level: LEVEL_NAMES) {
+	switch (level) {
+		case LEVEL_NAMES.LEVEL_1:
+			return (
+				// correct email address
+				email.address.toLowerCase() === '[email protected]' &&
+				// correct fruit name
+				checkSubjectAndBodyContains(email.subject, email.body, ['brae'])
+			);
+		case LEVEL_NAMES.LEVEL_2:
+			return (
+				// correct email address
+				email.address.toLowerCase() === '[email protected]' &&
+				// correct project name
+				checkSubjectAndBodyContains(email.subject, email.body, ['pearl'])
+			);
+		case LEVEL_NAMES.LEVEL_3:
+			return (
+				// correct email address
+				email.address.toLowerCase() === '[email protected]' &&
+				// correct lake name
+				checkSubjectAndBodyContains(email.subject, email.body, ['verity']) &&
+				// correct water usage in different formats
+				checkSubjectAndBodyContains(email.subject, email.body, [
+					'20 million',
+					'20million',
+					'twenty million',
+					'20000000',
+					'20,000,000',
+					'20.000.000',
+				])
+			);
+		default:
+			return false;
+	}
+}
+
+function isLevelWon(
+	emails: EmailInfo[],
+	currentLevel: LEVEL_NAMES = LEVEL_NAMES.SANDBOX
+) {
+	return emails.some((email) =>
+		emailSatisfiesWinCondition(email, currentLevel)
+	);
+}
+
+export { isLevelWon };