From 528059608ad82d8116bc86aa94e1aee8a9cebecc Mon Sep 17 00:00:00 2001 From: Huiqiang Jiang Date: Tue, 20 Feb 2024 14:35:55 +0000 Subject: [PATCH] Feature(LLMLingua): add pre commit check & unittest --- .github/ISSUE_TEMPLATE/bug_report.yml | 2 +- .github/ISSUE_TEMPLATE/config.yml | 2 +- .github/ISSUE_TEMPLATE/feature_request.yml | 2 +- .github/ISSUE_TEMPLATE/general_issue.yml | 2 +- .github/workflows/release.yml | 2 +- .gitignore | 2 +- .pre-commit-config.yaml | 40 ++++----- DOCUMENT.md | 2 +- Makefile | 2 +- README.md | 26 +++--- SECURITY.md | 2 +- SUPPORT.md | 8 +- Transparency_FAQ.md | 42 ++++----- examples/CoT.ipynb | 42 ++++++--- examples/Code.ipynb | 27 ++++-- examples/OnlineMeeting.ipynb | 37 +++++--- examples/RAG.ipynb | 38 ++++++--- examples/RAGLlamaIndex.ipynb | 20 ++--- examples/Retrieval.ipynb | 99 ++++++++++++++-------- llmlingua/prompt_compressor.py | 9 +- setup.cfg | 2 +- setup.py | 2 +- tests/test_llmlingua.py | 1 - tests/test_longllmlingua.py | 1 - 24 files changed, 248 insertions(+), 164 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 80f8c6e..d520d52 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -44,4 +44,4 @@ body: - Python Version: - Related Issues: - Any other relevant information. - placeholder: Any additional details \ No newline at end of file + placeholder: Any additional details diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index a49eab2..0086358 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1 +1 @@ -blank_issues_enabled: true \ No newline at end of file +blank_issues_enabled: true diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml index 03994b3..5b7d7e8 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.yml +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -23,4 +23,4 @@ body: attributes: label: Additional context description: Add any other context or screenshots about the feature request here. - placeholder: Any additional information \ No newline at end of file + placeholder: Any additional information diff --git a/.github/ISSUE_TEMPLATE/general_issue.yml b/.github/ISSUE_TEMPLATE/general_issue.yml index 1aa6d79..f480312 100644 --- a/.github/ISSUE_TEMPLATE/general_issue.yml +++ b/.github/ISSUE_TEMPLATE/general_issue.yml @@ -9,4 +9,4 @@ body: attributes: label: Describe the issue description: A clear and concise description of what the question is. - placeholder: The detail of question. \ No newline at end of file + placeholder: The detail of question. diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 8944a8a..687857a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -37,4 +37,4 @@ jobs: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} shell: pwsh - run: twine upload dist/* \ No newline at end of file + run: twine upload dist/* diff --git a/.gitignore b/.gitignore index b55e6a9..7508476 100644 --- a/.gitignore +++ b/.gitignore @@ -400,4 +400,4 @@ FodyWeavers.xsd # build build/* -dist/* \ No newline at end of file +dist/* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 012b522..0aa896d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -26,26 +26,26 @@ repos: rev: 23.3.0 hooks: - id: black - - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.261 - hooks: - - id: ruff - args: ["--fix"] - - repo: https://github.com/codespell-project/codespell - rev: v2.2.6 - hooks: - - id: codespell - args: ["-L", "ans,linar,nam,"] - exclude: | - (?x)^( - pyproject.toml | - website/static/img/ag.svg | - website/yarn.lock | - notebook/.* - )$ + # - repo: https://github.com/charliermarsh/ruff-pre-commit + # rev: v0.0.261 + # hooks: + # - id: ruff + # args: ["--fix"] + # - repo: https://github.com/codespell-project/codespell + # rev: v2.2.6 + # hooks: + # - id: codespell + # args: ["-L", "ans,linar,nam,"] + # exclude: | + # (?x)^( + # pyproject.toml | + # website/static/img/ag.svg | + # website/yarn.lock | + # notebook/.* + # )$ - repo: https://github.com/nbQA-dev/nbQA rev: 1.7.1 hooks: - - id: nbqa-ruff - args: ["--fix"] - - id: nbqa-black \ No newline at end of file + # - id: nbqa-ruff + # args: ["--fix"] + - id: nbqa-black diff --git a/DOCUMENT.md b/DOCUMENT.md index d2ad221..c99c2dd 100644 --- a/DOCUMENT.md +++ b/DOCUMENT.md @@ -145,7 +145,7 @@ recovered_response = llm_lingua.recover( ### Using phi-2 -Thanks to the efforts of the community, phi-2 is now available for use in LLMLingua. +Thanks to the efforts of the community, phi-2 is now available for use in LLMLingua. Before using it, please update your transformers to the GitHub version by running `pip install -U git+https://github.com/huggingface/transformers.git`. diff --git a/Makefile b/Makefile index 8bd4d6b..b0d90d2 100644 --- a/Makefile +++ b/Makefile @@ -13,4 +13,4 @@ style: flake8 $(CHECK_DIRS) test: - @${PYTHON} -m pytest -n auto --dist=loadfile -s -v ./tests/ \ No newline at end of file + @${PYTHON} -m pytest -n auto --dist=loadfile -s -v ./tests/ diff --git a/README.md b/README.md index 79e89fb..656013f 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,16 @@ -
-
- LLMLingua -
-
-

(Long)LLMLingua: Enhancing Large Language Model Inference via Prompt Compression

-
+
+
+ LLMLingua +
+
+

(Long)LLMLingua: Enhancing Large Language Model Inference via Prompt Compression

+

- | Project Page | - LLMLingua Paper | - LongLLMLingua Paper | + | Project Page | + LLMLingua Paper | + LongLLMLingua Paper | HF Space Demo |

@@ -102,7 +102,7 @@ To get started with (Long)LLMLingua, simply install it using pip: ```bash pip install llmlingua ``` - + #### 2. **Using (Long)LLMLingua for Prompt Compression:** With (Long)LLMLingua, you can easily compress your prompts. Here’s how you can do it: @@ -152,8 +152,8 @@ contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additio ## Trademarks -This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft -trademarks or logos is subject to and must follow +This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft +trademarks or logos is subject to and must follow [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. Any use of third-party trademarks or logos are subject to those third-party's policies. diff --git a/SECURITY.md b/SECURITY.md index e138ec5..9dc6316 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -14,7 +14,7 @@ Instead, please report them to the Microsoft Security Response Center (MSRC) at If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). -You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). +You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: diff --git a/SUPPORT.md b/SUPPORT.md index 5a28ce6..effba63 100644 --- a/SUPPORT.md +++ b/SUPPORT.md @@ -1,13 +1,13 @@ # Support -## How to file issues and get help +## How to file issues and get help -This project uses GitHub Issues to track bugs and feature requests. Please search the existing -issues before filing new issues to avoid duplicates. For new issues, file your bug or +This project uses GitHub Issues to track bugs and feature requests. Please search the existing +issues before filing new issues to avoid duplicates. For new issues, file your bug or feature request as a new Issue. For help and questions about using this project, please refer the [document](./DOCUMENT.md). -## Microsoft Support Policy +## Microsoft Support Policy Support for this **PROJECT or PRODUCT** is limited to the resources listed above. diff --git a/Transparency_FAQ.md b/Transparency_FAQ.md index 9712c9d..74034e5 100644 --- a/Transparency_FAQ.md +++ b/Transparency_FAQ.md @@ -1,36 +1,36 @@ -# LLMLingua's Responsible AI FAQ +# LLMLingua's Responsible AI FAQ -## What is LLMLingua? +## What is LLMLingua? -- LLMLingua is a simple and efficient method to compress prompt up to 20x and keeping the original prompt knowledge like ICL, reasoning, etc. -- LLMLingua takes user-defined prompts and compression goals as input, and outputs a compressed prompt, which may often result in a form of expression that is difficult for humans to understand. +- LLMLingua is a simple and efficient method to compress prompt up to 20x and keeping the original prompt knowledge like ICL, reasoning, etc. +- LLMLingua takes user-defined prompts and compression goals as input, and outputs a compressed prompt, which may often result in a form of expression that is difficult for humans to understand. -## What can LLMLingua do? +## What can LLMLingua do? -- LLMLingua can simultaneously reduce the length of prompts and the output of LLMs (20%-30%), thus saving API calls; -- Compressed prompts from LLMLingua can be directly used with black-box LLMs, such as ChatGPT, GPT-4, and Claude; -- By compressing prompts, LLMLingua allows for more information to be included within the original token length, thereby improving model performance; -- LLMLingua relies on a small language model, like GPT-2 or LLaMA-7b, for perplexity calculations, which is a relatively low-cost approach; -- Compressed prompts generated by LLMLingua can be understood by LLMs, preserving their original capabilities in downstream tasks and keeping the original prompt knowledge like ICL, reasoning, etc. LLMs can also recover the essential information from the compressed prompts; -- LLMLingua is a robustness method, no need any training for the LLMs; -- Additionally, LLMLingua can be used to compress KV-Cache, which speeds up inference. +- LLMLingua can simultaneously reduce the length of prompts and the output of LLMs (20%-30%), thus saving API calls; +- Compressed prompts from LLMLingua can be directly used with black-box LLMs, such as ChatGPT, GPT-4, and Claude; +- By compressing prompts, LLMLingua allows for more information to be included within the original token length, thereby improving model performance; +- LLMLingua relies on a small language model, like GPT-2 or LLaMA-7b, for perplexity calculations, which is a relatively low-cost approach; +- Compressed prompts generated by LLMLingua can be understood by LLMs, preserving their original capabilities in downstream tasks and keeping the original prompt knowledge like ICL, reasoning, etc. LLMs can also recover the essential information from the compressed prompts; +- LLMLingua is a robustness method, no need any training for the LLMs; +- Additionally, LLMLingua can be used to compress KV-Cache, which speeds up inference. -## What is/are LLMLingua’s intended use(s)? +## What is/are LLMLingua’s intended use(s)? -- Users who call black-box LLM APIs similar to GPT-4, those who utilize ChatGPT to handle longer content, as well as model deployers and cloud service providers, can benefit from these techniques. +- Users who call black-box LLM APIs similar to GPT-4, those who utilize ChatGPT to handle longer content, as well as model deployers and cloud service providers, can benefit from these techniques. -## How was LLMLingua evaluated? What metrics are used to measure performance? +## How was LLMLingua evaluated? What metrics are used to measure performance? -- In our experiments, we conducted a detailed evaluation of the performance of compressed prompts across various tasks, particularly in those involving LLM-specific capabilities, such as In-Context Learning, reasoning tasks, summarization, and conversation tasks. We assessed our approach using compression ratio and performance loss as evaluation metrics. +- In our experiments, we conducted a detailed evaluation of the performance of compressed prompts across various tasks, particularly in those involving LLM-specific capabilities, such as In-Context Learning, reasoning tasks, summarization, and conversation tasks. We assessed our approach using compression ratio and performance loss as evaluation metrics. -## What are the limitations of LLMLingua? How can users minimize the impact of LLMLingua’s limitations when using the system? +## What are the limitations of LLMLingua? How can users minimize the impact of LLMLingua’s limitations when using the system? -- The potential harmful, false or biased responses using the compressed prompts would likely be unchanged. Thus using LLMLingua has no inherent benefits or risks when it comes to those types of responsible AI issues. -- LLMLingua may struggle to perform well at particularly high compression ratios, especially when the original prompts are already quite short. +- The potential harmful, false or biased responses using the compressed prompts would likely be unchanged. Thus using LLMLingua has no inherent benefits or risks when it comes to those types of responsible AI issues. +- LLMLingua may struggle to perform well at particularly high compression ratios, especially when the original prompts are already quite short. -## What operational factors and settings allow for effective and responsible use of LLMLingua? +## What operational factors and settings allow for effective and responsible use of LLMLingua? -- Users can set parameters such as the boundaries between different components (instruction, context, question) in the prompt, compression goals, and the small model used for compression calculations. Afterward, they can input the compressed prompt into black-box LLMs for use. +- Users can set parameters such as the boundaries between different components (instruction, context, question) in the prompt, compression goals, and the small model used for compression calculations. Afterward, they can input the compressed prompt into black-box LLMs for use. ## What is instruction, context, and question? diff --git a/examples/CoT.ipynb b/examples/CoT.ipynb index 9430631..336fed8 100644 --- a/examples/CoT.ipynb +++ b/examples/CoT.ipynb @@ -147,6 +147,7 @@ "source": [ "# Download the original prompt and dataset\n", "from datasets import load_dataset\n", + "\n", "!wget https://raw.githubusercontent.com/FranxYao/chain-of-thought-hub/main/gsm8k/lib_prompt/prompt_hardest.txt\n", "prompt_complex = open(\"./prompt_hardest.txt\").read()\n", "gsm8k = load_dataset(\"gsm8k\", \"main\")\n", @@ -162,6 +163,7 @@ "source": [ "# Using the OAI\n", "import openai\n", + "\n", "openai.api_key = \"\"" ] }, @@ -174,10 +176,11 @@ "source": [ "# or Using the AOAI\n", "import openai\n", + "\n", "openai.api_key = \"\"\n", "openai.api_base = \"https://xxxx.openai.azure.com/\"\n", - "openai.api_type = 'azure'\n", - "openai.api_version = '2023-05-15'" + "openai.api_type = \"azure\"\n", + "openai.api_version = \"2023-05-15\"" ] }, { @@ -267,6 +270,7 @@ "source": [ "# The response from original prompt\n", "import json\n", + "\n", "instruction = \"Please reference the following examples to answer the math question,\\n\"\n", "prompt = instruction + prompt_complex + \"\\n\\nQuestion: \" + question\n", "\n", @@ -328,6 +332,7 @@ "source": [ "# Setup LLMLingua\n", "from llmlingua import PromptCompressor\n", + "\n", "llm_lingua = PromptCompressor()" ] }, @@ -382,7 +387,9 @@ ")\n", "\n", "instruction = \"Please reference the following examples to answer the math question,\\n\"\n", - "prompt = instruction + compressed_prompt[\"compressed_prompt\"] + \"\\n\\nQuestion: \" + question\n", + "prompt = (\n", + " instruction + compressed_prompt[\"compressed_prompt\"] + \"\\n\\nQuestion: \" + question\n", + ")\n", "\n", "request_data = {\n", " \"prompt\": prompt,\n", @@ -418,6 +425,7 @@ "source": [ "import re\n", "\n", + "\n", "def extract_ans(ans_model):\n", " ans_model = ans_model.split(\"\\n\")\n", " ans = []\n", @@ -431,6 +439,7 @@ " residual = \"\\n\".join(residual)\n", " return ans, residual\n", "\n", + "\n", "def parse_pred_ans(filename):\n", " with open(filename) as fd:\n", " lines = fd.readlines()\n", @@ -506,6 +515,7 @@ "# Test in GSM8K test set\n", "from tqdm import tqdm\n", "import os\n", + "\n", "os.makedirs(\"outputs\", exist_ok=True)\n", "i = 0\n", "\n", @@ -518,11 +528,20 @@ " iterative_size=100,\n", ")\n", "\n", - "for q, a in tqdm(zip(gsm8k_test['question'], gsm8k_test['answer']), \n", - " total=len(gsm8k_test['question'])):\n", - " instruction = \"Please reference the following examples to answer the math question,\\n\"\n", - " prompt = instruction + compressed_prompt[\"compressed_prompt\"] + \"\\n\\nQuestion: \" + q + \"\\n\"\n", - " \n", + "for q, a in tqdm(\n", + " zip(gsm8k_test[\"question\"], gsm8k_test[\"answer\"]), total=len(gsm8k_test[\"question\"])\n", + "):\n", + " instruction = (\n", + " \"Please reference the following examples to answer the math question,\\n\"\n", + " )\n", + " prompt = (\n", + " instruction\n", + " + compressed_prompt[\"compressed_prompt\"]\n", + " + \"\\n\\nQuestion: \"\n", + " + q\n", + " + \"\\n\"\n", + " )\n", + "\n", " request_data = {\n", " \"prompt\": prompt,\n", " \"max_tokens\": 400,\n", @@ -537,8 +556,11 @@ " )\n", " ans_model = response[\"choices\"][0][\"text\"]\n", " ans_, residual = extract_ans(ans_model)\n", - " with open('outputs/test_gpt_3.5_turbo_LLMLingua_174.txt', 'a') as fd:\n", - " fd.write(\"Q: %s\\nA_model:\\n%s\\nA:\\n%s\\n\\n\" % (q, ans_.replace(\"Q:\", \"\").replace(\"A:\", \"\"), a))\n", + " with open(\"outputs/test_gpt_3.5_turbo_LLMLingua_174.txt\", \"a\") as fd:\n", + " fd.write(\n", + " \"Q: %s\\nA_model:\\n%s\\nA:\\n%s\\n\\n\"\n", + " % (q, ans_.replace(\"Q:\", \"\").replace(\"A:\", \"\"), a)\n", + " )\n", " i += 1" ] }, diff --git a/examples/Code.ipynb b/examples/Code.ipynb index 7a09e03..cd3e5da 100644 --- a/examples/Code.ipynb +++ b/examples/Code.ipynb @@ -115,7 +115,8 @@ "source": [ "# Download the original prompt and dataset\n", "from datasets import load_dataset\n", - "dataset = load_dataset('THUDM/LongBench', \"repobench-p\", split='test')" + "\n", + "dataset = load_dataset(\"THUDM/LongBench\", \"repobench-p\", split=\"test\")" ] }, { @@ -127,6 +128,7 @@ "source": [ "# Using the OAI\n", "import openai\n", + "\n", "openai.api_key = \"\"" ] }, @@ -139,10 +141,11 @@ "source": [ "# or Using the AOAI\n", "import openai\n", + "\n", "openai.api_key = \"\"\n", "openai.api_base = \"https://xxxx.openai.azure.com/\"\n", - "openai.api_type = 'azure'\n", - "openai.api_version = '2023-05-15'" + "openai.api_type = \"azure\"\n", + "openai.api_version = \"2023-05-15\"" ] }, { @@ -161,7 +164,9 @@ "outputs": [], "source": [ "# select an example from MeetingBank\n", - "contexts, question, answer = [dataset[1][key] for key in [\"context\", \"input\", \"answers\"]]\n", + "contexts, question, answer = [\n", + " dataset[1][key] for key in [\"context\", \"input\", \"answers\"]\n", + "]\n", "instruction = \"Please complete the code given below.\"\n", "question = question + \"\\n\\nNext line of code:\\n\"" ] @@ -233,6 +238,7 @@ "source": [ "# The response from original prompt, using GPT-4-32k\n", "import json\n", + "\n", "prompt = \"\\n\\n\".join([instruction, contexts, question])\n", "\n", "message = [\n", @@ -296,6 +302,7 @@ "source": [ "# Setup LLMLingua\n", "from llmlingua import PromptCompressor\n", + "\n", "llm_lingua = PromptCompressor()" ] }, @@ -307,7 +314,9 @@ "outputs": [], "source": [ "contexts_list = contexts.split(\"\\n\")\n", - "contexts_list = [\"\\n\".join(contexts_list[ii: ii + 4]) for ii in range(0, len(contexts_list), 4)]" + "contexts_list = [\n", + " \"\\n\".join(contexts_list[ii : ii + 4]) for ii in range(0, len(contexts_list), 4)\n", + "]" ] }, { @@ -359,12 +368,12 @@ " question=question,\n", " target_token=2000,\n", " condition_compare=True,\n", - " condition_in_question='after',\n", - " rank_method='longllmlingua',\n", + " condition_in_question=\"after\",\n", + " rank_method=\"longllmlingua\",\n", " use_sentence_level_filter=False,\n", " context_budget=\"+100\",\n", - " dynamic_context_compression_ratio=0.4, # enable dynamic_context_compression_ratio\n", - " reorder_context=\"sort\"\n", + " dynamic_context_compression_ratio=0.4, # enable dynamic_context_compression_ratio\n", + " reorder_context=\"sort\",\n", ")\n", "message = [\n", " {\"role\": \"user\", \"content\": compressed_prompt[\"compressed_prompt\"]},\n", diff --git a/examples/OnlineMeeting.ipynb b/examples/OnlineMeeting.ipynb index 48bb81e..016d292 100644 --- a/examples/OnlineMeeting.ipynb +++ b/examples/OnlineMeeting.ipynb @@ -115,6 +115,7 @@ "source": [ "# Download the original prompt and dataset\n", "from datasets import load_dataset\n", + "\n", "dataset = load_dataset(\"lytang/MeetingBank-transcript\")[\"train\"]" ] }, @@ -127,6 +128,7 @@ "source": [ "# Using the OAI\n", "import openai\n", + "\n", "openai.api_key = \"\"" ] }, @@ -139,10 +141,11 @@ "source": [ "# or Using the AOAI\n", "import openai\n", + "\n", "openai.api_key = \"\"\n", "openai.api_base = \"https://xxxx.openai.azure.com/\"\n", - "openai.api_type = 'azure'\n", - "openai.api_version = '2023-05-15'" + "openai.api_type = \"azure\"\n", + "openai.api_version = \"2023-05-15\"" ] }, { @@ -220,6 +223,7 @@ "source": [ "# The response from original prompt, using GPT-4-32k\n", "import json\n", + "\n", "prompt = \"\\n\\n\".join([contexts, question])\n", "\n", "message = [\n", @@ -275,6 +279,7 @@ "source": [ "# Setup LLMLingua\n", "from llmlingua import PromptCompressor\n", + "\n", "llm_lingua = PromptCompressor()" ] }, @@ -327,12 +332,12 @@ " question=question,\n", " target_token=200,\n", " condition_compare=True,\n", - " condition_in_question='after',\n", - " rank_method='longllmlingua',\n", + " condition_in_question=\"after\",\n", + " rank_method=\"longllmlingua\",\n", " use_sentence_level_filter=False,\n", " context_budget=\"+100\",\n", - " dynamic_context_compression_ratio=0.4, # enable dynamic_context_compression_ratio\n", - " reorder_context=\"sort\"\n", + " dynamic_context_compression_ratio=0.4, # enable dynamic_context_compression_ratio\n", + " reorder_context=\"sort\",\n", ")\n", "message = [\n", " {\"role\": \"user\", \"content\": compressed_prompt[\"compressed_prompt\"]},\n", @@ -411,6 +416,7 @@ "source": [ "# The response from original prompt, using GPT-4-32k\n", "import json\n", + "\n", "prompt = \"\\n\\n\".join([contexts, question])\n", "\n", "message = [\n", @@ -481,11 +487,11 @@ " question=question,\n", " target_token=200,\n", " condition_compare=True,\n", - " condition_in_question='after',\n", - " rank_method='longllmlingua',\n", + " condition_in_question=\"after\",\n", + " rank_method=\"longllmlingua\",\n", " use_sentence_level_filter=True,\n", " context_budget=\"+100\",\n", - " reorder_context=\"sort\"\n", + " reorder_context=\"sort\",\n", ")\n", "message = [\n", " {\"role\": \"user\", \"content\": compressed_prompt[\"compressed_prompt\"]},\n", @@ -523,7 +529,9 @@ "metadata": {}, "outputs": [], "source": [ - "question = \"Question: what are the arrangements the Police Department will make this year?\"\n", + "question = (\n", + " \"Question: what are the arrangements the Police Department will make this year?\"\n", + ")\n", "reference = \"enhancing community engagement and internal communication models, building a culture of accountability and transparency, and prioritizing recruitment and retention.\"" ] }, @@ -564,6 +572,7 @@ "source": [ "# The response from original prompt, using GPT-4-32k\n", "import json\n", + "\n", "prompt = \"\\n\\n\".join([contexts, question])\n", "\n", "message = [\n", @@ -634,12 +643,12 @@ " question=question,\n", " target_token=2000,\n", " condition_compare=True,\n", - " condition_in_question='after',\n", - " rank_method='longllmlingua',\n", + " condition_in_question=\"after\",\n", + " rank_method=\"longllmlingua\",\n", " use_sentence_level_filter=False,\n", " context_budget=\"+100\",\n", - " dynamic_context_compression_ratio=0.4, # enable dynamic_context_compression_ratio\n", - " reorder_context=\"sort\"\n", + " dynamic_context_compression_ratio=0.4, # enable dynamic_context_compression_ratio\n", + " reorder_context=\"sort\",\n", ")\n", "message = [\n", " {\"role\": \"user\", \"content\": compressed_prompt[\"compressed_prompt\"]},\n", diff --git a/examples/RAG.ipynb b/examples/RAG.ipynb index f585f97..827f418 100644 --- a/examples/RAG.ipynb +++ b/examples/RAG.ipynb @@ -131,6 +131,7 @@ "source": [ "# Using the OAI\n", "import openai\n", + "\n", "openai.api_key = \"\"" ] }, @@ -143,10 +144,11 @@ "source": [ "# or Using the AOAI\n", "import openai\n", + "\n", "openai.api_key = \"\"\n", "openai.api_base = \"https://xxxx.openai.azure.com/\"\n", - "openai.api_type = 'azure'\n", - "openai.api_version = '2023-05-15'" + "openai.api_type = \"azure\"\n", + "openai.api_version = \"2023-05-15\"" ] }, { @@ -178,7 +180,6 @@ "from tqdm import tqdm\n", "from lost_in_the_middle.prompting import (\n", " Document,\n", - " get_closedbook_qa_prompt,\n", " get_qa_prompt,\n", ")\n", "\n", @@ -202,7 +203,15 @@ " c = prompt.split(\"\\n\\n\")\n", " instruction, question = c[0], c[-1]\n", " demonstration = \"\\n\".join(c[1:-1])\n", - " datasets.append({\"id\": ii, \"instruction\": instruction, \"demonstration\": demonstration, \"question\": question, \"answer\": input_example[\"answers\"]})" + " datasets.append(\n", + " {\n", + " \"id\": ii,\n", + " \"instruction\": instruction,\n", + " \"demonstration\": demonstration,\n", + " \"question\": question,\n", + " \"answer\": input_example[\"answers\"],\n", + " }\n", + " )" ] }, { @@ -213,7 +222,9 @@ "outputs": [], "source": [ "# select an example from NaturalQuestions\n", - "instruction, demonstration_str, question, answer = [datasets[23][key] for key in [\"instruction\", \"demonstration\", \"question\", \"answer\"]]" + "instruction, demonstration_str, question, answer = [\n", + " datasets[23][key] for key in [\"instruction\", \"demonstration\", \"question\", \"answer\"]\n", + "]" ] }, { @@ -345,6 +356,7 @@ "source": [ "# Setup LLMLingua\n", "from llmlingua import PromptCompressor\n", + "\n", "llm_lingua = PromptCompressor()" ] }, @@ -397,12 +409,12 @@ " question=question,\n", " target_token=500,\n", " condition_compare=True,\n", - " condition_in_question='after',\n", - " rank_method='longllmlingua',\n", + " condition_in_question=\"after\",\n", + " rank_method=\"longllmlingua\",\n", " use_sentence_level_filter=False,\n", " context_budget=\"+100\",\n", - " dynamic_context_compression_ratio=0.4, # enable dynamic_context_compression_ratio\n", - " reorder_context=\"sort\"\n", + " dynamic_context_compression_ratio=0.4, # enable dynamic_context_compression_ratio\n", + " reorder_context=\"sort\",\n", ")\n", "message = [\n", " {\"role\": \"user\", \"content\": compressed_prompt[\"compressed_prompt\"]},\n", @@ -474,12 +486,12 @@ " question=question,\n", " target_token=100,\n", " condition_compare=True,\n", - " condition_in_question='after',\n", - " rank_method='longllmlingua',\n", + " condition_in_question=\"after\",\n", + " rank_method=\"longllmlingua\",\n", " use_sentence_level_filter=False,\n", " context_budget=\"+100\",\n", - " dynamic_context_compression_ratio=0.4, # enable dynamic_context_compression_ratio\n", - " reorder_context=\"sort\"\n", + " dynamic_context_compression_ratio=0.4, # enable dynamic_context_compression_ratio\n", + " reorder_context=\"sort\",\n", ")\n", "message = [\n", " {\"role\": \"user\", \"content\": compressed_prompt[\"compressed_prompt\"]},\n", diff --git a/examples/RAGLlamaIndex.ipynb b/examples/RAGLlamaIndex.ipynb index 2a007fb..56c56d4 100644 --- a/examples/RAGLlamaIndex.ipynb +++ b/examples/RAGLlamaIndex.ipynb @@ -13,8 +13,8 @@ "id": "05d999bc-83a3-454f-a8a4-44cbff1fcedc", "metadata": {}, "source": [ - "\r\n", - " \"Open\r\n", + "\n", + " \"Open\n", "" ] }, @@ -198,6 +198,7 @@ "source": [ "# Using the OAI\n", "import openai\n", + "\n", "openai.api_key = \"\"" ] }, @@ -210,10 +211,11 @@ "source": [ "# or Using the AOAI\n", "import openai\n", + "\n", "openai.api_key = \"\"\n", "openai.api_base = \"https://xxxx.openai.azure.com/\"\n", - "openai.api_type = 'azure'\n", - "openai.api_version = '2023-05-15'" + "openai.api_type = \"azure\"\n", + "openai.api_version = \"2023-05-15\"" ] }, { @@ -271,8 +273,6 @@ "from llama_index import (\n", " VectorStoreIndex,\n", " SimpleDirectoryReader,\n", - " load_index_from_storage,\n", - " StorageContext,\n", ")" ] }, @@ -284,9 +284,7 @@ "outputs": [], "source": [ "# load documents\n", - "documents = SimpleDirectoryReader(\n", - " input_files=[\"paul_graham_essay.txt\"]\n", - ").load_data()" + "documents = SimpleDirectoryReader(input_files=[\"paul_graham_essay.txt\"]).load_data()" ] }, { @@ -518,7 +516,7 @@ "\n", "Original Tokens: 10719\n", "Compressed Tokens: 308\n", - "Comressed Ratio: 34.80x\n" + "Compressed Ratio: 34.80x\n" ] } ], @@ -533,7 +531,7 @@ "print()\n", "print(\"Original Tokens:\", original_tokens)\n", "print(\"Compressed Tokens:\", compressed_tokens)\n", - "print(\"Comressed Ratio:\", f\"{original_tokens/(compressed_tokens + 1e-5):.2f}x\")" + "print(\"Compressed Ratio:\", f\"{original_tokens/(compressed_tokens + 1e-5):.2f}x\")" ] }, { diff --git a/examples/Retrieval.ipynb b/examples/Retrieval.ipynb index d5fe2c1..b960a23 100644 --- a/examples/Retrieval.ipynb +++ b/examples/Retrieval.ipynb @@ -144,7 +144,7 @@ "id": "f8676ffa-5117-44dc-9742-bb9ab1d56e0c", "metadata": {}, "source": [ - "## Evaulate the Reranker Performance" + "## Evaluate the Reranker Performance" ] }, { @@ -156,6 +156,7 @@ "source": [ "# Setup LLMLingua\n", "from llmlingua import PromptCompressor\n", + "\n", "llm_lingua = PromptCompressor()" ] }, @@ -174,10 +175,10 @@ "\n", "from lost_in_the_middle.prompting import (\n", " Document,\n", - " get_closedbook_qa_prompt,\n", " get_qa_prompt,\n", ")\n", "\n", + "\n", "def get_reranker_results(rank_method):\n", " path = \"lost-in-the-middle/qa_data/20_total_documents/nq-open-20_total_documents_gold_at_9.jsonl.gz\"\n", " d_idx = 9\n", @@ -189,31 +190,55 @@ " documents = []\n", " for ctx in deepcopy(input_example[\"ctxs\"]):\n", " documents.append(Document.from_dict(ctx))\n", - " \n", + "\n", " prompt = get_qa_prompt(\n", " question,\n", " documents,\n", " mention_random_ordering=False,\n", " query_aware_contextualization=False,\n", " )\n", - " \n", + "\n", " c = prompt.split(\"\\n\\n\")\n", " instruction, question = c[0], c[-1]\n", " demonstration = \"\\n\".join(c[1:-1])\n", " corpus = demonstration.split(\"\\n\")\n", - " \n", - " idx = llm_lingua.get_rank_results(corpus, question, rank_method, \"none\" if rank_method == \"llmlingua\" else \"after\", [0] * 20)\n", + "\n", + " idx = llm_lingua.get_rank_results(\n", + " corpus,\n", + " question,\n", + " rank_method,\n", + " \"none\" if rank_method == \"llmlingua\" else \"after\",\n", + " [0] * 20,\n", + " )\n", " idx = [ii[0] for ii in idx].index(d_idx)\n", " res.append(idx)\n", " logs = [rank_method]\n", " for idx in range(1, 21):\n", " acc = len([ii for ii in res if ii < idx]) / len(res) * 100\n", - " print(\"R@{},{:.2f}\".format(idx, len([ii for ii in res if ii < idx]) / len(res) * 100))\n", + " print(\n", + " \"R@{},{:.2f}\".format(\n", + " idx, len([ii for ii in res if ii < idx]) / len(res) * 100\n", + " )\n", + " )\n", " logs.append(\"{:.2f}\".format(acc))\n", " with open(\"retrieval.csv\", \"a\") as f:\n", " f.write(\",\".join(logs) + \"\\n\")\n", "\n", - "for rank_method in [\"bm25\", \"gzip\", \"sentbert\", \"openai\", \"bge\", \"bge_reranker\", \"bge_llmembedder\", \"jinza\", \"voyageai\", \"cohere\", \"llmlingua\", \"longllmlingua\"]:\n", + "\n", + "for rank_method in [\n", + " \"bm25\",\n", + " \"gzip\",\n", + " \"sentbert\",\n", + " \"openai\",\n", + " \"bge\",\n", + " \"bge_reranker\",\n", + " \"bge_llmembedder\",\n", + " \"jinza\",\n", + " \"voyageai\",\n", + " \"cohere\",\n", + " \"llmlingua\",\n", + " \"longllmlingua\",\n", + "]:\n", " get_reranker_results(rank_method)" ] }, @@ -234,7 +259,9 @@ "source": [ "recall_str = open(\"retrieval.csv\").read()\n", "recall_list = [ii.split(\"\\t\") for ii in recall_str.split(\"\\n\\n\")]\n", - "recall_list_data = [[ii[0], j + 1, float(k)] for ii in recall_list for j, k in enumerate(ii[1:])]" + "recall_list_data = [\n", + " [ii[0], j + 1, float(k)] for ii in recall_list for j, k in enumerate(ii[1:])\n", + "]" ] }, { @@ -255,52 +282,58 @@ } ], "source": [ - "import joblib\n", "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", - "import math\n", - "import os\n", - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "from matplotlib.patches import Patch\n", - "from matplotlib.lines import Line2D\n", "\n", "sns.set_theme(\"poster\", style=\"darkgrid\", font_scale=1.2)\n", - "plt.rcParams['pdf.fonttype'] = 42\n", - "plt.rcParams['ps.fonttype'] = 42\n", + "plt.rcParams[\"pdf.fonttype\"] = 42\n", + "plt.rcParams[\"ps.fonttype\"] = 42\n", "\n", "# plt.rc('font', family=\"Times New Roman\", size=30) #controls default text size\n", - "plt.rc('axes', titlesize=40) #fontsize of the title\n", - "plt.rc('axes', labelsize=40) #fontsize of the x and y labels\n", - "plt.rc('xtick', labelsize=40) #fontsize of the x tick labels\n", - "plt.rc('ytick', labelsize=40) #fontsize of the y tick labels\n", - "plt.rc('legend', fontsize=25) #fontsize of the legend\n", + "plt.rc(\"axes\", titlesize=40) # fontsize of the title\n", + "plt.rc(\"axes\", labelsize=40) # fontsize of the x and y labels\n", + "plt.rc(\"xtick\", labelsize=40) # fontsize of the x tick labels\n", + "plt.rc(\"ytick\", labelsize=40) # fontsize of the y tick labels\n", + "plt.rc(\"legend\", fontsize=25) # fontsize of the legend\n", "\n", - "plt.figure(figsize=(14,10))\n", + "plt.figure(figsize=(14, 10))\n", "pq = pd.DataFrame(recall_list_data)\n", "pq.columns = [\"Method\", \"k\", \"R@k\"]\n", "\n", - "methods = [\"LLMLingua\", \"BM25\", \"OpenAI\", \"Voageai\", \"BPE-large-en v1.5\", \"SBERT\", \"Gzip\", \"Cohere-Rerank\", \"BGE-llmembeder\",\"Jina\", \"LongLLMLingua $r_k$ \\nw/o restrict\", \"BGE-Ranker-large\",\"LongLLMLingua $r_k$\"]\n", + "methods = [\n", + " \"LLMLingua\",\n", + " \"BM25\",\n", + " \"OpenAI\",\n", + " \"Voageai\",\n", + " \"BPE-large-en v1.5\",\n", + " \"SBERT\",\n", + " \"Gzip\",\n", + " \"Cohere-Rerank\",\n", + " \"BGE-llmembeder\",\n", + " \"Jina\",\n", + " \"LongLLMLingua $r_k$ \\nw/o restrict\",\n", + " \"BGE-Ranker-large\",\n", + " \"LongLLMLingua $r_k$\",\n", + "]\n", "colors = [\"4\", \"1\", \"2\", \"2\", \"2\", \"2\", \"1\", \"3\", \"3\", \"2\", \"5\", \"3\", \"5\"]\n", - "markers = ['>', '^', 's', 'o', \"<\", \".\", \"*\", '>', '^', 's', 'o', \"<\", \".\", \"*\"]\n", + "markers = [\">\", \"^\", \"s\", \"o\", \"<\", \".\", \"*\", \">\", \"^\", \"s\", \"o\", \"<\", \".\", \"*\"]\n", "for m, c, ma in zip(methods, colors, markers):\n", " plt.plot(\n", " pq[pq[\"Method\"] == m][\"k\"],\n", - " pq[pq[\"Method\"] == m]['R@k'],\n", + " pq[pq[\"Method\"] == m][\"R@k\"],\n", " alpha=0.65,\n", - " color=f'C{c}',\n", - "# marker=ma,\n", + " color=f\"C{c}\",\n", + " # marker=ma,\n", " label=m,\n", " linewidth=7,\n", " markersize=5 if ma == \"*\" else 5,\n", " )\n", "\n", - "plt.xlabel('Number of Retained Documents')\n", - "plt.ylabel(f'Recall(%)')\n", + "plt.xlabel(\"Number of Retained Documents\")\n", + "plt.ylabel(\"Recall(%)\")\n", "plt.xticks([1, 5, 10, 15, 20], labels=[\"1\", \"5\", \"10\", \"15\", \"20\"])\n", - "plt.legend(loc='lower right')\n", + "plt.legend(loc=\"lower right\")\n", "plt.tight_layout()\n", "# plt.savefig(\"retrieval.pdf\", dpi=1000)\n", "\n", diff --git a/llmlingua/prompt_compressor.py b/llmlingua/prompt_compressor.py index 045f57f..c9bfa1b 100644 --- a/llmlingua/prompt_compressor.py +++ b/llmlingua/prompt_compressor.py @@ -266,9 +266,12 @@ def structured_compress_prompt( ) else: rate = target_token / sum(context_tokens_length) - context, context_segs, context_segs_rate, context_segs_compress = ( - self.segment_structured_context(context, rate) - ) + ( + context, + context_segs, + context_segs_rate, + context_segs_compress, + ) = self.segment_structured_context(context, rate) return self.compress_prompt( context, instruction, diff --git a/setup.cfg b/setup.cfg index 21266a7..d9f2745 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,4 +25,4 @@ use_parentheses = True [flake8] ignore = E203, E501, E741, W503, W605 -max-line-length = 119 \ No newline at end of file +max-line-length = 119 diff --git a/setup.py b/setup.py index eac19b6..4f81a50 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ # Copyright (c) 2023 Microsoft # Licensed under The MIT License [see LICENSE for details] - + from setuptools import find_packages, setup # PEP0440 compatible formatted version, see: diff --git a/tests/test_llmlingua.py b/tests/test_llmlingua.py index 687540e..3daf3a4 100644 --- a/tests/test_llmlingua.py +++ b/tests/test_llmlingua.py @@ -1,5 +1,4 @@ import unittest -import unittest.mock as mock from llmlingua import PromptCompressor diff --git a/tests/test_longllmlingua.py b/tests/test_longllmlingua.py index 83808ea..6dccd5f 100644 --- a/tests/test_longllmlingua.py +++ b/tests/test_longllmlingua.py @@ -1,5 +1,4 @@ import unittest -import unittest.mock as mock from llmlingua import PromptCompressor