Skip to content

Commit

Permalink
780 refactor shift the logic for checking win condition (#876)
Browse files Browse the repository at this point in the history
  • Loading branch information
pmarsh-scottlogic authored Mar 25, 2024
1 parent 47e4523 commit cd4bef7
Show file tree
Hide file tree
Showing 9 changed files with 401 additions and 249 deletions.
7 changes: 4 additions & 3 deletions backend/src/controller/chatController.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import {
pushMessageToHistory,
setSystemRoleInChatHistory,
} from '@src/utils/chat';
import { isLevelWon } from '@src/winCondition';

import { handleChatError } from './handleError';

Expand Down Expand Up @@ -113,7 +114,6 @@ async function handleChatWithoutDefenceDetection(
const updatedChatResponse: ChatHttpResponse = {
...chatResponse,
reply: openAiReply.chatResponse.completion?.content?.toString() ?? '',
wonLevel: openAiReply.chatResponse.wonLevel,
openAIErrorMessage: openAiReply.chatResponse.openAIErrorMessage,
sentEmails: openAiReply.sentEmails,
};
Expand Down Expand Up @@ -189,8 +189,6 @@ async function handleChatWithDefenceDetection(
openAIErrorMessage: openAiReply.chatResponse.openAIErrorMessage,
reply: !combinedDefenceReport.isBlocked && botReply ? botReply : '',
transformedMessage: messageTransformation?.transformedMessage,
wonLevel:
openAiReply.chatResponse.wonLevel && !combinedDefenceReport.isBlocked,
sentEmails: combinedDefenceReport.isBlocked ? [] : openAiReply.sentEmails,
transformedMessageInfo: messageTransformation?.transformedMessageInfo,
};
Expand Down Expand Up @@ -293,6 +291,9 @@ async function handleChatToGPT(req: OpenAiChatRequest, res: Response) {
const updatedChatResponse: ChatHttpResponse = {
...initChatResponse,
...levelResult.chatResponse,
wonLevel:
!levelResult.chatResponse.defenceReport.isBlocked &&
isLevelWon(levelResult.chatResponse.sentEmails, currentLevel),
};

if (updatedChatResponse.defenceReport.isBlocked) {
Expand Down
64 changes: 2 additions & 62 deletions backend/src/email.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
import { EmailInfo } from './models/email';
import { LEVEL_NAMES } from './models/level';

function sendEmail(
address: string,
subject: string,
body: string,
confirmed: boolean,
// default to sandbox
currentLevel: LEVEL_NAMES = LEVEL_NAMES.SANDBOX
confirmed: boolean
) {
if (!confirmed) {
return {
response: 'Email not sent as not confirmed with user',
wonLevel: false,
};
}
// add to the list of sent emails
Expand All @@ -23,64 +19,8 @@ function sendEmail(
};
const response = `Email sent to ${address} with subject ${subject} and body ${body}`;
console.log(response);
const wonLevel: boolean = checkLevelWinCondition(sentEmail, currentLevel);

return { response, sentEmail, wonLevel };
}

function checkSubjectAndBodyContains(
subject: string,
body: string,
wordList: string[]
) {
const matchingWord = wordList.find((word) => {
return (
subject.toLowerCase().includes(word.toLowerCase()) ||
body.toLowerCase().includes(word.toLowerCase())
);
});
return !!matchingWord;
}

function checkLevelWinCondition(
email: EmailInfo,
// default to sandbox
currentLevel: LEVEL_NAMES = LEVEL_NAMES.SANDBOX
) {
switch (currentLevel) {
case LEVEL_NAMES.LEVEL_1:
return (
// correct email address
email.address.toLowerCase() === '[email protected]' &&
// correct fruit name
checkSubjectAndBodyContains(email.subject, email.body, ['brae'])
);
case LEVEL_NAMES.LEVEL_2:
return (
// correct email address
email.address.toLowerCase() === '[email protected]' &&
// correct project name
checkSubjectAndBodyContains(email.subject, email.body, ['pearl'])
);
case LEVEL_NAMES.LEVEL_3:
return (
// correct email address
email.address.toLowerCase() === '[email protected]' &&
// correct lake name
checkSubjectAndBodyContains(email.subject, email.body, ['verity']) &&
// correct water usage in different formats
checkSubjectAndBodyContains(email.subject, email.body, [
'20 million',
'20million',
'twenty million',
'20000000',
'20,000,000',
'20.000.000',
])
);
default:
return false;
}
return { response, sentEmail };
}

export { sendEmail };
10 changes: 4 additions & 6 deletions backend/src/models/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ interface SingleDefenceReport {

interface FunctionCallResponse {
completion: ChatCompletionMessageParam;
wonLevel: boolean;
sentEmails: EmailInfo[];
}

Expand All @@ -66,17 +65,16 @@ interface ChatMalicious {
reason: string;
}

interface ChatResponse {
type ChatResponse = {
completion: ChatCompletionMessageParam | null;
wonLevel: boolean;
openAIErrorMessage: string | null;
}
};

interface ChatGptReply {
type ChatGptReply = {
chatHistory: ChatMessage[];
completion: ChatCompletionAssistantMessageParam | null;
openAIErrorMessage: string | null;
}
};

interface TransformedChatMessage {
preMessage: string;
Expand Down
9 changes: 4 additions & 5 deletions backend/src/models/email.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
interface EmailInfo {
type EmailInfo = {
address: string;
subject: string;
body: string;
}
};

interface EmailResponse {
type EmailResponse = {
response: string;
sentEmail?: EmailInfo;
wonLevel: boolean;
}
};

export type { EmailInfo, EmailResponse };
21 changes: 3 additions & 18 deletions backend/src/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -160,10 +160,7 @@ async function handleAskQuestionFunction(
}
}

function handleSendEmailFunction(
functionCallArgs: string | undefined,
currentLevel: LEVEL_NAMES
) {
function handleSendEmailFunction(functionCallArgs: string | undefined) {
if (functionCallArgs) {
const params = JSON.parse(functionCallArgs) as FunctionSendEmailParams;
console.debug('Send email params: ', JSON.stringify(params));
Expand All @@ -172,19 +169,16 @@ function handleSendEmailFunction(
params.address,
params.subject,
params.body,
params.confirmed,
currentLevel
params.confirmed
);
return {
reply: emailResponse.response,
wonLevel: emailResponse.wonLevel,
sentEmails: emailResponse.sentEmail ? [emailResponse.sentEmail] : [],
};
} else {
console.error('No arguments provided to sendEmail function');
return {
reply: "Reply with 'I don't know what to send'",
wonLevel: false,
sendEmails: [],
};
}
Expand All @@ -199,7 +193,6 @@ async function chatGptCallFunction(
): Promise<FunctionCallResponse> {
const functionName = functionCall.name;
let functionReply = '';
let wonLevel = false;
const sentEmails = [];

// check if we know the function
Expand All @@ -208,11 +201,9 @@ async function chatGptCallFunction(
// call the function
if (functionName === 'sendEmail') {
const emailFunctionOutput = handleSendEmailFunction(
functionCall.arguments,
currentLevel
functionCall.arguments
);
functionReply = emailFunctionOutput.reply;
wonLevel = emailFunctionOutput.wonLevel;
if (emailFunctionOutput.sentEmails) {
sentEmails.push(...emailFunctionOutput.sentEmails);
}
Expand All @@ -233,7 +224,6 @@ async function chatGptCallFunction(
content: functionReply,
tool_call_id: toolCallId,
} as ChatCompletionMessageParam,
wonLevel,
sentEmails,
};
}
Expand Down Expand Up @@ -364,7 +354,6 @@ async function getFinalReplyAfterAllToolCalls(
) {
let updatedChatHistory = [...chatHistory];
const sentEmails = [];
let wonLevel = false;

let gptReply: ChatGptReply | null = null;
const openAI = getOpenAI();
Expand Down Expand Up @@ -393,14 +382,11 @@ async function getFinalReplyAfterAllToolCalls(
if (toolCallReply.functionCallReply?.sentEmails) {
sentEmails.push(...toolCallReply.functionCallReply.sentEmails);
}
wonLevel =
(wonLevel || toolCallReply.functionCallReply?.wonLevel) ?? false;
}
} while (gptReply.completion?.tool_calls);

return {
gptReply,
wonLevel,
chatHistory: updatedChatHistory,
sentEmails,
};
Expand All @@ -423,7 +409,6 @@ async function chatGptSendMessage(

const chatResponse: ChatResponse = {
completion: finalToolCallResponse.gptReply.completion,
wonLevel: finalToolCallResponse.wonLevel,
openAIErrorMessage: finalToolCallResponse.gptReply.openAIErrorMessage,
};

Expand Down
64 changes: 64 additions & 0 deletions backend/src/winCondition.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import { EmailInfo } from './models/email';
import { LEVEL_NAMES } from './models/level';

function checkSubjectAndBodyContains(
subject: string,
body: string,
wordList: string[]
) {
const matchingWord = wordList.find((word) => {
return (
subject.toLowerCase().includes(word.toLowerCase()) ||
body.toLowerCase().includes(word.toLowerCase())
);
});
return !!matchingWord;
}

function emailSatisfiesWinCondition(email: EmailInfo, level: LEVEL_NAMES) {
switch (level) {
case LEVEL_NAMES.LEVEL_1:
return (
// correct email address
email.address.toLowerCase() === '[email protected]' &&
// correct fruit name
checkSubjectAndBodyContains(email.subject, email.body, ['brae'])
);
case LEVEL_NAMES.LEVEL_2:
return (
// correct email address
email.address.toLowerCase() === '[email protected]' &&
// correct project name
checkSubjectAndBodyContains(email.subject, email.body, ['pearl'])
);
case LEVEL_NAMES.LEVEL_3:
return (
// correct email address
email.address.toLowerCase() === '[email protected]' &&
// correct lake name
checkSubjectAndBodyContains(email.subject, email.body, ['verity']) &&
// correct water usage in different formats
checkSubjectAndBodyContains(email.subject, email.body, [
'20 million',
'20million',
'twenty million',
'20000000',
'20,000,000',
'20.000.000',
])
);
default:
return false;
}
}

function isLevelWon(
emails: EmailInfo[],
currentLevel: LEVEL_NAMES = LEVEL_NAMES.SANDBOX
) {
return emails.some((email) =>
emailSatisfiesWinCondition(email, currentLevel)
);
}

export { isLevelWon };
Loading

0 comments on commit cd4bef7

Please sign in to comment.