diff --git a/package.json b/package.json index 807d2253b..4c0633ffc 100644 --- a/package.json +++ b/package.json @@ -50,6 +50,7 @@ "exponential-backoff": "^3.1.1", "husky": "^8.0.2", "jimp": "^0.22.4", + "js-tiktoken": "^1.0.7", "js-yaml": "^4.1.0", "jsonwebtoken": "^9.0.2", "libsodium-wrappers": "^0.7.11", @@ -63,6 +64,7 @@ "parse5": "^7.1.2", "prettier": "^2.7.1", "probot": "^12.2.4", + "sentencepiece-js": "^1.1.0", "telegraf": "^4.11.2", "tsx": "^3.12.7", "yaml": "^2.2.2" diff --git a/src/bindings/config.ts b/src/bindings/config.ts index 55d0fb8be..9b6d69afe 100644 --- a/src/bindings/config.ts +++ b/src/bindings/config.ts @@ -65,9 +65,7 @@ export const loadConfig = async (context: Context): Promise => { permitBaseUrl: process.env.PERMIT_BASE_URL || permitBaseUrl, }, unassign: { - timeRangeForMaxIssue: process.env.DEFAULT_TIME_RANGE_FOR_MAX_ISSUE - ? Number(process.env.DEFAULT_TIME_RANGE_FOR_MAX_ISSUE) - : timeRangeForMaxIssue, + timeRangeForMaxIssue: process.env.DEFAULT_TIME_RANGE_FOR_MAX_ISSUE ? Number(process.env.DEFAULT_TIME_RANGE_FOR_MAX_ISSUE) : timeRangeForMaxIssue, timeRangeForMaxIssueEnabled: process.env.DEFAULT_TIME_RANGE_FOR_MAX_ISSUE_ENABLED ? process.env.DEFAULT_TIME_RANGE_FOR_MAX_ISSUE_ENABLED == "true" : timeRangeForMaxIssueEnabled, @@ -108,7 +106,7 @@ export const loadConfig = async (context: Context): Promise => { registerWalletWithVerification: registerWalletWithVerification, }, ask: { - apiKey: openAIKey, + apiKey: process.env.OPENAI_API_KEY || openAIKey, tokenLimit: openAITokenLimit || 0, }, accessControl: enableAccessControl, diff --git a/src/configs/ubiquibot-config-default.ts b/src/configs/ubiquibot-config-default.ts index 9598ef782..83a1ec9c7 100644 --- a/src/configs/ubiquibot-config-default.ts +++ b/src/configs/ubiquibot-config-default.ts @@ -10,6 +10,7 @@ export const DefaultConfig: MergedConfig = { disableAnalytics: false, commentIncentives: false, registerWalletWithVerification: false, + openAIKey: process.env.OPENAI_API_KEY, promotionComment: "\n
If you enjoy the DevPool experience, please follow Ubiquity on GitHub and star this repo to show your support. It helps a lot!
", defaultLabels: [], diff --git a/src/declarations/tokenizer.model b/src/declarations/tokenizer.model new file mode 100644 index 000000000..85c0803f3 Binary files /dev/null and b/src/declarations/tokenizer.model differ diff --git a/src/handlers/comment/handlers/ask.ts b/src/handlers/comment/handlers/ask.ts index 63777d4ae..a1206cbc4 100644 --- a/src/handlers/comment/handlers/ask.ts +++ b/src/handlers/comment/handlers/ask.ts @@ -1,9 +1,11 @@ import { getBotContext, getLogger } from "../../../bindings"; import { Payload, StreamlinedComment, UserType } from "../../../types"; import { getAllIssueComments, getAllLinkedIssuesAndPullsInBody } from "../../../helpers"; -import { CreateChatCompletionRequestMessage } from "openai/resources/chat"; -import { askGPT, decideContextGPT, sysMsg } from "../../../helpers/gpt"; +import { ChatCompletionMessageParam } from "openai/resources/chat"; +import { askGPT, gptContextTemplate, sysMsg } from "../../../helpers/gpt"; import { ErrorDiff } from "../../../utils/helpers"; +import fetch from "node-fetch"; +import { SentencePieceProcessor, cleanText } from "sentencepiece-js"; /** * @param body The question to ask @@ -13,7 +15,6 @@ export const ask = async (body: string) => { const logger = getLogger(); const payload = context.payload as Payload; - const sender = payload.sender.login; const issue = payload.issue; if (!body) { @@ -24,20 +25,44 @@ export const ask = async (body: string) => { return `This command can only be used on issues`; } - const chatHistory: CreateChatCompletionRequestMessage[] = []; + let chatHistory: ChatCompletionMessageParam[] = []; const streamlined: StreamlinedComment[] = []; let linkedPRStreamlined: StreamlinedComment[] = []; let linkedIssueStreamlined: StreamlinedComment[] = []; - const regex = /^\/ask\s(.+)$/; + const regex = /^\/ask\s*([\s\S]*)$/; const matches = body.match(regex); if (matches) { const [, body] = matches; - // standard comments + const sp = new SentencePieceProcessor(); + try { + await sp.load(process.cwd() + "/src/declarations/tokenizer.model"); + await sp.loadVocabulary(process.cwd() + "/src/declarations/tokenizer.model"); + } catch (err) { + console.log("===================================="); + console.log("err", err); + console.log("===================================="); + } + + const encodee = (s: string, bos = true) => { + const bosID = sp.encodeIds("")[0]; + const eosID = sp.encodeIds("")[0]; + + if (typeof s !== "string") { + throw new Error("encodee only accepts strings"); + } + let t = sp.encodeIds(s); + + if (bos) { + t = [bosID, ...t]; + } + t = [...t, eosID]; + return t; + }; + const comments = await getAllIssueComments(issue.number); - // raw so we can grab the tag const commentsRaw = await getAllIssueComments(issue.number, "raw"); if (!comments) { @@ -45,13 +70,11 @@ export const ask = async (body: string) => { return ErrorDiff(`Error getting issue comments`); } - // add the first comment of the issue/pull request streamlined.push({ login: issue.user.login, body: issue.body, }); - // add the rest comments.forEach(async (comment, i) => { if (comment.user.type == UserType.User || commentsRaw[i].body.includes("")) { streamlined.push({ @@ -71,49 +94,130 @@ export const ask = async (body: string) => { linkedPRStreamlined = links.linkedPrs; } - // let chatgpt deduce what is the most relevant context - const gptDecidedContext = await decideContextGPT(chatHistory, streamlined, linkedPRStreamlined, linkedIssueStreamlined); - - if (linkedIssueStreamlined.length == 0 && linkedPRStreamlined.length == 0) { - // No external context to add - chatHistory.push( - { - role: "system", - content: sysMsg, - name: "UbiquityAI", - } as CreateChatCompletionRequestMessage, - { - role: "user", - content: body, - name: sender, - } as CreateChatCompletionRequestMessage - ); - } else { - chatHistory.push( - { - role: "system", - content: sysMsg, // provide the answer template - name: "UbiquityAI", - } as CreateChatCompletionRequestMessage, - { - role: "system", - content: "Original Context: " + JSON.stringify(gptDecidedContext), // provide the context - name: "system", - } as CreateChatCompletionRequestMessage, - { - role: "user", - content: "Question: " + JSON.stringify(body), // provide the question - name: "user", - } as CreateChatCompletionRequestMessage - ); + const formatChat = (chat: { role?: string; content?: string; login?: string; body?: string }[]) => { + if (chat.length === 0) return ""; + let chatString = ""; + chat.reduce((acc, message) => { + if (!message) return acc; + const role = acc.role || acc.login; + const content = acc.content || acc.body; + + chatString += `${cleanText(role)}: ${cleanText(content)}\n\n`; + + acc = { + role, + content, + }; + + return acc; + }); + console.log("chatString", chatString); + return chatString; + }; + + chatHistory.push( + { + role: "system", + content: gptContextTemplate, + }, + { + role: "user", + content: `This issue/Pr context: \n ${JSON.stringify(streamlined)}`, + } + ); + + if (linkedIssueStreamlined.length > 0) { + chatHistory.push({ + role: "user", + content: `Linked issue(s) context: \n ${JSON.stringify(linkedIssueStreamlined)}`, + }); + } else if (linkedPRStreamlined.length > 0) { + chatHistory.push({ + role: "user", + content: `Linked Pr(s) context: \n ${JSON.stringify(linkedPRStreamlined)}`, + }); } - const gptResponse = await askGPT(body, chatHistory); + const gptDecidedContext = await askGPT("ContextCall", chatHistory); + + const gptAnswer = typeof gptDecidedContext === "string" ? gptDecidedContext : gptDecidedContext.answer || ""; + const contextTokens = encodee(cleanText(gptAnswer)); + + chatHistory = []; + + const tokenSize = contextTokens.length + encodee(body).length; + + if (tokenSize > 4096) { + return "Your question is too long. Please ask a shorter question."; + } + + chatHistory.push( + { + role: "system", + content: `${sysMsg}`, + }, + { + role: "user", + content: `Context: ${cleanText(gptAnswer)} \n Question: ${body}`, + } + ); + + const chats = chatHistory.map((chat) => { + return { + role: chat.role, + content: chat.content ? cleanText(chat.content) : "", + }; + }); - if (typeof gptResponse === "string") { - return gptResponse; - } else if (gptResponse.answer) { - return gptResponse.answer; + const finalTokens = encodee(formatChat(chats), false); + + const options = { + method: "POST", + headers: { + accept: "application/json", + "content-type": "application/json", + authorization: "Bearer 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", + }, + body: JSON.stringify({ + model: "mistral-7b-instruct", + messages: chatHistory, + }), + }; + + const ans = await fetch("https://api.perplexity.ai/chat/completions", options).then((response) => response.json().catch((err) => console.log(err))); + const answer = { tokens: ans.usage, text: ans.choices[0].message.content }; + const gptRes = await askGPT(body, chatHistory); + + const gptAns = typeof gptRes === "string" ? gptRes : gptRes.answer || ""; + const gptTokens = typeof gptRes === "string" ? [] : gptRes.tokenUsage || []; + + const comment = ` +### Perp Tokens +\`\`\`json +${JSON.stringify(answer.tokens)} +\`\`\` + +### GPT Tokens +\`\`\`json +${JSON.stringify(gptTokens)} +\`\`\ + +### SPP Tokens +\`\`\`json +Note: JSON in responses are throwing this off rn: ${finalTokens.length + contextTokens.length} tokens +\`\`\` + +### Perp Response +${answer.text} + + + +### GPT Response +${gptAns} +`; + + if (answer) { + return comment; } else { return ErrorDiff(`Error getting response from GPT`); } diff --git a/src/helpers/gpt.ts b/src/helpers/gpt.ts index 046503d79..5736cb06c 100644 --- a/src/helpers/gpt.ts +++ b/src/helpers/gpt.ts @@ -1,6 +1,4 @@ -import { getBotConfig, getBotContext, getLogger } from "../bindings"; -import { Payload, StreamlinedComment, UserType } from "../types"; -import { getAllIssueComments, getAllLinkedIssuesAndPullsInBody } from "../helpers"; +import { getBotConfig, getLogger } from "../bindings"; import OpenAI from "openai"; import { CreateChatCompletionRequestMessage } from "openai/resources/chat"; import { ErrorDiff } from "../utils/helpers"; @@ -61,77 +59,7 @@ Example:[ * @param linkedPRStreamlined an array of comments in the form of { login: string, body: string } * @param linkedIssueStreamlined an array of comments in the form of { login: string, body: string } */ -export const decideContextGPT = async ( - chatHistory: CreateChatCompletionRequestMessage[], - streamlined: StreamlinedComment[], - linkedPRStreamlined: StreamlinedComment[], - linkedIssueStreamlined: StreamlinedComment[] -) => { - const context = getBotContext(); - const logger = getLogger(); - - const payload = context.payload as Payload; - const issue = payload.issue; - - if (!issue) { - return `Payload issue is undefined`; - } - - // standard comments - const comments = await getAllIssueComments(issue.number); - // raw so we can grab the tag - const commentsRaw = await getAllIssueComments(issue.number, "raw"); - - if (!comments) { - logger.info(`Error getting issue comments`); - return `Error getting issue comments`; - } - - // add the first comment of the issue/pull request - streamlined.push({ - login: issue.user.login, - body: issue.body, - }); - - // add the rest - comments.forEach(async (comment, i) => { - if (comment.user.type == UserType.User || commentsRaw[i].body.includes("")) { - streamlined.push({ - login: comment.user.login, - body: comment.body, - }); - } - }); - - // returns the conversational context from all linked issues and prs - const links = await getAllLinkedIssuesAndPullsInBody(issue.number); - - if (typeof links === "string") { - logger.info(`Error getting linked issues or prs: ${links}`); - return `Error getting linked issues or prs: ${links}`; - } - - linkedIssueStreamlined = links.linkedIssues; - linkedPRStreamlined = links.linkedPrs; - - chatHistory.push( - { - role: "system", - content: "This issue/Pr context: \n" + JSON.stringify(streamlined), - name: "UbiquityAI", - } as CreateChatCompletionRequestMessage, - { - role: "system", - content: "Linked issue(s) context: \n" + JSON.stringify(linkedIssueStreamlined), - name: "UbiquityAI", - } as CreateChatCompletionRequestMessage, - { - role: "system", - content: "Linked Pr(s) context: \n" + JSON.stringify(linkedPRStreamlined), - name: "UbiquityAI", - } as CreateChatCompletionRequestMessage - ); - +export const decideContextGPT = async (chatHistory: CreateChatCompletionRequestMessage[]) => { // we'll use the first response to determine the context of future calls const res = await askGPT("", chatHistory); diff --git a/src/helpers/issue.ts b/src/helpers/issue.ts index 8cd78b047..4124a877c 100644 --- a/src/helpers/issue.ts +++ b/src/helpers/issue.ts @@ -764,84 +764,58 @@ export const getAllLinkedIssuesAndPullsInBody = async (issueNumber: number) => { const linkedPRStreamlined: StreamlinedComment[] = []; const linkedIssueStreamlined: StreamlinedComment[] = []; - const regex = /https:\/\/github\.com\/[^/\s]+\/[^/\s]+\/(issues|pull)\/(\d+)/gi; + const regex = /(#(\d+)|https:\/\/github\.com\/[^/\s]+\/[^/\s]+\/(issues|pull)\/(\d+))/gi; + const matches = body.match(regex); if (matches) { - try { - const linkedIssues: number[] = []; - const linkedPrs: number[] = []; - - // this finds refs via all patterns: #, full url or [#25](url.to.issue) - const issueRef = issue.body.match(/(#(\d+)|https:\/\/github\.com\/[^/\s]+\/[^/\s]+\/(issues|pull)\/(\d+))/gi); - - // if they exist, strip out the # or the url and push them to their arrays - if (issueRef) { - issueRef.forEach((issue) => { - if (issue.includes("#")) { - linkedIssues.push(Number(issue.slice(1))); - } else { - if (issue.split("/")[5] == "pull") { - linkedPrs.push(Number(issue.split("/")[6])); - } else linkedIssues.push(Number(issue.split("/")[6])); - } + const matched = matches.length; + + for (let i = 0; i < matched; i++) { + const match = matches[i]; + console.log(`match: ${match}`); + const issueNumber = match.includes("#") ? Number(match.slice(1)) : Number(match.split("/")[6]); + const issue = await getIssueByNumber(context, issueNumber); + const pull = await getPullByNumber(context, issueNumber); + + if (pull) { + linkedPRStreamlined.push({ + login: "system", + body: `=============== Pull Request #${pull.number}: ${pull.title} + ===============\n ${pull.body}}`, }); - } else { - logger.info(`No linked issues or prs found`); - } - - if (linkedPrs.length > 0) { - for (let i = 0; i < linkedPrs.length; i++) { - const pr = await getPullByNumber(context, linkedPrs[i]); - if (pr) { + const prComments = await getAllIssueComments(issueNumber); + const prCommentsRaw = await getAllIssueComments(issueNumber, "raw"); + prComments.forEach(async (comment, i) => { + if (comment.user.type == UserType.User || prCommentsRaw[i].body.includes("")) { linkedPRStreamlined.push({ - login: "system", - body: `=============== Pull Request #${pr.number}: ${pr.title} + ===============\n ${pr.body}}`, - }); - const prComments = await getAllIssueComments(linkedPrs[i]); - const prCommentsRaw = await getAllIssueComments(linkedPrs[i], "raw"); - prComments.forEach(async (comment, i) => { - if (comment.user.type == UserType.User || prCommentsRaw[i].body.includes("")) { - linkedPRStreamlined.push({ - login: comment.user.login, - body: comment.body, - }); - } + login: comment.user.login, + body: comment.body, }); } - } - } - - if (linkedIssues.length > 0) { - for (let i = 0; i < linkedIssues.length; i++) { - const issue = await getIssueByNumber(context, linkedIssues[i]); - if (issue) { + }); + } else { + if (!issue) continue; + linkedIssueStreamlined.push({ + login: "system", + body: `=============== Issue #${issue.number}: ${issue.title} + ===============\n ${issue.body} `, + }); + const issueComments = await getAllIssueComments(issueNumber); + const issueCommentsRaw = await getAllIssueComments(issueNumber, "raw"); + issueComments.forEach(async (comment, i) => { + if (comment.user.type == UserType.User || issueCommentsRaw[i].body.includes("")) { linkedIssueStreamlined.push({ - login: "system", - body: `=============== Issue #${issue.number}: ${issue.title} + ===============\n ${issue.body} `, - }); - const issueComments = await getAllIssueComments(linkedIssues[i]); - const issueCommentsRaw = await getAllIssueComments(linkedIssues[i], "raw"); - issueComments.forEach(async (comment, i) => { - if (comment.user.type == UserType.User || issueCommentsRaw[i].body.includes("")) { - linkedIssueStreamlined.push({ - login: comment.user.login, - body: comment.body, - }); - } + login: comment.user.login, + body: comment.body, }); } - } + }); } - - return { - linkedIssues: linkedIssueStreamlined, - linkedPrs: linkedPRStreamlined, - }; - } catch (error) { - logger.info(`Error getting linked issues or prs: ${error}`); - return `Error getting linked issues or prs: ${error}`; } + + return { + linkedIssues: linkedIssueStreamlined, + linkedPrs: linkedPRStreamlined, + }; } else { logger.info(`No matches found`); return { diff --git a/yarn.lock b/yarn.lock index 65e3ead1b..cb9ed887e 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2997,6 +2997,11 @@ anymatch@^3.0.3, anymatch@~3.1.2: normalize-path "^3.0.0" picomatch "^2.0.4" +app-root-path@^3.1.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/app-root-path/-/app-root-path-3.1.0.tgz#5971a2fc12ba170369a7a1ef018c71e6e47c2e86" + integrity sha512-biN3PwB2gUtjaYy/isrU3aNWI5w+fAfvHkSvCKeQGxhmYpwKFUxudR3Yya+KqVRHBmEDYh+/lTozYCFbmzX4nA== + arg@^4.1.0: version "4.1.3" resolved "https://registry.yarnpkg.com/arg/-/arg-4.1.3.tgz#269fc7ad5b8e42cb63c896d5666017261c144089" @@ -3174,7 +3179,7 @@ base-64@^0.1.0: resolved "https://registry.yarnpkg.com/base-64/-/base-64-0.1.0.tgz#780a99c84e7d600260361511c4877613bf24f6bb" integrity sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA== -base64-js@^1.3.1: +base64-js@^1.3.1, base64-js@^1.5.1: version "1.5.1" resolved "https://registry.yarnpkg.com/base64-js/-/base64-js-1.5.1.tgz#1b1b440160a5bf7ad40b650f095963481903930a" integrity sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA== @@ -6369,6 +6374,13 @@ js-sha3@0.8.0: resolved "https://registry.yarnpkg.com/js-sha3/-/js-sha3-0.8.0.tgz#b9b7a5da73afad7dedd0f8c463954cbde6818840" integrity sha512-gF1cRrHhIzNfToc802P800N8PpXS+evLLXfsVpowqmAFR9uwbi89WvXg2QspOmXL8QL86J4T1EpFu+yUkwJY3Q== +js-tiktoken@^1.0.7: + version "1.0.7" + resolved "https://registry.yarnpkg.com/js-tiktoken/-/js-tiktoken-1.0.7.tgz#56933fcd2093e8304060dfde3071bda91812e6f5" + integrity sha512-biba8u/clw7iesNEWLOLwrNGoBP2lA+hTaBLs/D45pJdUPFXyxD6nhcDVtADChghv4GgyAiMKYMiRx7x6h7Biw== + dependencies: + base64-js "^1.5.1" + js-tokens@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/js-tokens/-/js-tokens-4.0.0.tgz#19203fb59991df98e3a287050d4647cdeaf32499" @@ -8414,6 +8426,13 @@ send@0.18.0: range-parser "~1.2.1" statuses "2.0.1" +sentencepiece-js@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/sentencepiece-js/-/sentencepiece-js-1.1.0.tgz#d3cd7fe0e81f2662e8b50a5aa38565f5ca8f74bd" + integrity sha512-HN6teKCRO9tz37zbaNI3i+vMZ/JRWDt6kmZ7OVpzQv1jZHyYNmf5tE7CFpIYN86+y9TLB0cuscMdA3OHhT/MhQ== + dependencies: + app-root-path "^3.1.0" + serve-static@1.15.0: version "1.15.0" resolved "https://registry.yarnpkg.com/serve-static/-/serve-static-1.15.0.tgz#faaef08cffe0a1a62f60cad0c4e513cff0ac9540"