Feature(LLMLingua): add pre commit check & unittest

microsoft · Feb 20, 2024 · 5280596 · 5280596
1 parent 36f61fa
commit 5280596
Show file tree

Hide file tree

Showing 24 changed files with 248 additions and 164 deletions.
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -44,4 +44,4 @@ body:
         - Python Version: <!-- Specify the Python version (e.g., 3.8) -->
         - Related Issues: <!-- Link to any related issues here (e.g., #1) -->
         - Any other relevant information.
-      placeholder: Any additional details
+      placeholder: Any additional details
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
@@ -1 +1 @@
-blank_issues_enabled: true
+blank_issues_enabled: true
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -23,4 +23,4 @@ body:
     attributes:
       label: Additional context
       description: Add any other context or screenshots about the feature request here.
-      placeholder: Any additional information
+      placeholder: Any additional information
diff --git a/.github/ISSUE_TEMPLATE/general_issue.yml b/.github/ISSUE_TEMPLATE/general_issue.yml
@@ -9,4 +9,4 @@ body:
     attributes:
       label: Describe the issue
       description: A clear and concise description of what the question is.
-      placeholder: The detail of question.
+      placeholder: The detail of question.
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -37,4 +37,4 @@ jobs:
           TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
           TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
         shell: pwsh
-        run: twine upload dist/*
+        run: twine upload dist/*
diff --git a/.gitignore b/.gitignore
@@ -400,4 +400,4 @@ FodyWeavers.xsd
 
 # build
 build/*
-dist/*
+dist/*
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -26,26 +26,26 @@ repos:
     rev: 23.3.0
     hooks:
     - id: black
-  - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.0.261
-    hooks:
-      - id: ruff
-        args: ["--fix"]
-  - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.6
-    hooks:
-      - id: codespell
-        args: ["-L", "ans,linar,nam,"]
-        exclude: |
-            (?x)^(
-              pyproject.toml |
-              website/static/img/ag.svg |
-              website/yarn.lock |
-              notebook/.*
-            )$
+  # - repo: https://github.com/charliermarsh/ruff-pre-commit
+  #   rev: v0.0.261
+    # hooks:
+    #   - id: ruff
+    #     args: ["--fix"]
+  # - repo: https://github.com/codespell-project/codespell
+  #   rev: v2.2.6
+    # hooks:
+    #   - id: codespell
+    #     args: ["-L", "ans,linar,nam,"]
+    #     exclude: |
+    #         (?x)^(
+    #           pyproject.toml |
+    #           website/static/img/ag.svg |
+    #           website/yarn.lock |
+    #           notebook/.*
+    #         )$
   - repo: https://github.com/nbQA-dev/nbQA
     rev: 1.7.1
     hooks:
-      - id: nbqa-ruff
-        args: ["--fix"]
-      - id: nbqa-black
+      # - id: nbqa-ruff
+      #   args: ["--fix"]
+      - id: nbqa-black
diff --git a/DOCUMENT.md b/DOCUMENT.md
@@ -145,7 +145,7 @@ recovered_response = llm_lingua.recover(
 
 ### Using phi-2
 
-Thanks to the efforts of the community, phi-2 is now available for use in LLMLingua. 
+Thanks to the efforts of the community, phi-2 is now available for use in LLMLingua.
 
 Before using it, please update your transformers to the GitHub version by running `pip install -U git+https://github.com/huggingface/transformers.git`.
 

diff --git a/Makefile b/Makefile
@@ -13,4 +13,4 @@ style:
 	flake8 $(CHECK_DIRS)
 
 test:
-	@${PYTHON} -m pytest -n auto --dist=loadfile -s -v ./tests/
+	@${PYTHON} -m pytest -n auto --dist=loadfile -s -v ./tests/
diff --git a/README.md b/README.md
@@ -1,16 +1,16 @@
-<div style="display: flex; align-items: center;">  
-    <div style="width: 100px; margin-right: 10px; height:auto;" align="left">  
-        <img src="images/LLMLingua_logo.png" alt="LLMLingua" width="100" align="left">  
-    </div>  
-    <div style="flex-grow: 1;" align="center">  
-        <h2 align="center">(Long)LLMLingua: Enhancing Large Language Model Inference via Prompt Compression</h2>  
-    </div>  
+<div style="display: flex; align-items: center;">
+    <div style="width: 100px; margin-right: 10px; height:auto;" align="left">
+        <img src="images/LLMLingua_logo.png" alt="LLMLingua" width="100" align="left">
+    </div>
+    <div style="flex-grow: 1;" align="center">
+        <h2 align="center">(Long)LLMLingua: Enhancing Large Language Model Inference via Prompt Compression</h2>
+    </div>
 </div>
 
 <p align="center">
-    | <a href="https://llmlingua.com/"><b>Project Page</b></a> | 
-    <a href="https://arxiv.org/abs/2310.05736"><b>LLMLingua Paper</b></a> | 
-    <a href="https://arxiv.org/abs/2310.06839"><b>LongLLMLingua Paper</b></a> | 
+    | <a href="https://llmlingua.com/"><b>Project Page</b></a> |
+    <a href="https://arxiv.org/abs/2310.05736"><b>LLMLingua Paper</b></a> |
+    <a href="https://arxiv.org/abs/2310.06839"><b>LongLLMLingua Paper</b></a> |
     <a href="https://huggingface.co/spaces/microsoft/LLMLingua"><b>HF Space Demo</b></a> |
 </p>
 
@@ -102,7 +102,7 @@ To get started with (Long)LLMLingua, simply install it using pip:
 ```bash
 pip install llmlingua
 ```
-    
+
 #### 2. **Using (Long)LLMLingua for Prompt Compression:**
 
 With (Long)LLMLingua, you can easily compress your prompts. Here’s how you can do it:
@@ -152,8 +152,8 @@ contact [[email protected]](mailto:[email protected]) with any additio
 
 ## Trademarks
 
-This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 
-trademarks or logos is subject to and must follow 
+This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft
+trademarks or logos is subject to and must follow
 [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
 Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
 Any use of third-party trademarks or logos are subject to those third-party's policies.
diff --git a/SECURITY.md b/SECURITY.md
@@ -14,7 +14,7 @@ Instead, please report them to the Microsoft Security Response Center (MSRC) at
 
 If you prefer to submit without logging in, send email to [[email protected]](mailto:[email protected]).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
 
-You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 
+You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc).
 
 Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
 

diff --git a/SUPPORT.md b/SUPPORT.md
@@ -1,13 +1,13 @@
 # Support
 
-## How to file issues and get help  
+## How to file issues and get help
 
-This project uses GitHub Issues to track bugs and feature requests. Please search the existing 
-issues before filing new issues to avoid duplicates.  For new issues, file your bug or 
+This project uses GitHub Issues to track bugs and feature requests. Please search the existing
+issues before filing new issues to avoid duplicates.  For new issues, file your bug or
 feature request as a new Issue.
 
 For help and questions about using this project, please refer the [document](./DOCUMENT.md).
 
-## Microsoft Support Policy  
+## Microsoft Support Policy
 
 Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
diff --git a/Transparency_FAQ.md b/Transparency_FAQ.md
@@ -1,36 +1,36 @@
-# LLMLingua's Responsible AI FAQ 
+# LLMLingua's Responsible AI FAQ
 
-## What is LLMLingua? 
+## What is LLMLingua?
 
-- LLMLingua is a simple and efficient method to compress prompt up to 20x and keeping the original prompt knowledge like ICL, reasoning, etc. 
-- LLMLingua takes user-defined prompts and compression goals as input, and outputs a compressed prompt, which may often result in a form of expression that is difficult for humans to understand. 
+- LLMLingua is a simple and efficient method to compress prompt up to 20x and keeping the original prompt knowledge like ICL, reasoning, etc.
+- LLMLingua takes user-defined prompts and compression goals as input, and outputs a compressed prompt, which may often result in a form of expression that is difficult for humans to understand.
 
-## What can LLMLingua do?  
+## What can LLMLingua do?
 
-- LLMLingua can simultaneously reduce the length of prompts and the output of LLMs (20%-30%), thus saving API calls; 
-- Compressed prompts from LLMLingua can be directly used with black-box LLMs, such as ChatGPT, GPT-4, and Claude; 
-- By compressing prompts, LLMLingua allows for more information to be included within the original token length, thereby improving model performance; 
-- LLMLingua relies on a small language model, like GPT-2 or LLaMA-7b, for perplexity calculations, which is a relatively low-cost approach; 
-- Compressed prompts generated by LLMLingua can be understood by LLMs, preserving their original capabilities in downstream tasks and keeping the original prompt knowledge like ICL, reasoning, etc. LLMs can also recover the essential information from the compressed prompts; 
-- LLMLingua is a robustness method, no need any training for the LLMs; 
-- Additionally, LLMLingua can be used to compress KV-Cache, which speeds up inference. 
+- LLMLingua can simultaneously reduce the length of prompts and the output of LLMs (20%-30%), thus saving API calls;
+- Compressed prompts from LLMLingua can be directly used with black-box LLMs, such as ChatGPT, GPT-4, and Claude;
+- By compressing prompts, LLMLingua allows for more information to be included within the original token length, thereby improving model performance;
+- LLMLingua relies on a small language model, like GPT-2 or LLaMA-7b, for perplexity calculations, which is a relatively low-cost approach;
+- Compressed prompts generated by LLMLingua can be understood by LLMs, preserving their original capabilities in downstream tasks and keeping the original prompt knowledge like ICL, reasoning, etc. LLMs can also recover the essential information from the compressed prompts;
+- LLMLingua is a robustness method, no need any training for the LLMs;
+- Additionally, LLMLingua can be used to compress KV-Cache, which speeds up inference.
 
-## What is/are LLMLingua’s intended use(s)? 
+## What is/are LLMLingua’s intended use(s)?
 
-- Users who call black-box LLM APIs similar to GPT-4, those who utilize ChatGPT to handle longer content, as well as model deployers and cloud service providers, can benefit from these techniques. 
+- Users who call black-box LLM APIs similar to GPT-4, those who utilize ChatGPT to handle longer content, as well as model deployers and cloud service providers, can benefit from these techniques.
 
-## How was LLMLingua evaluated? What metrics are used to measure performance? 
+## How was LLMLingua evaluated? What metrics are used to measure performance?
 
-- In our experiments, we conducted a detailed evaluation of the performance of compressed prompts across various tasks, particularly in those involving LLM-specific capabilities, such as In-Context Learning, reasoning tasks, summarization, and conversation tasks. We assessed our approach using compression ratio and performance loss as evaluation metrics. 
+- In our experiments, we conducted a detailed evaluation of the performance of compressed prompts across various tasks, particularly in those involving LLM-specific capabilities, such as In-Context Learning, reasoning tasks, summarization, and conversation tasks. We assessed our approach using compression ratio and performance loss as evaluation metrics.
 
-## What are the limitations of LLMLingua? How can users minimize the impact of LLMLingua’s limitations when using the system? 
+## What are the limitations of LLMLingua? How can users minimize the impact of LLMLingua’s limitations when using the system?
 
-- The potential harmful, false or biased responses using the compressed prompts would likely be unchanged. Thus using LLMLingua has no inherent benefits or risks when it comes to those types of responsible AI issues. 
-- LLMLingua may struggle to perform well at particularly high compression ratios, especially when the original prompts are already quite short. 
+- The potential harmful, false or biased responses using the compressed prompts would likely be unchanged. Thus using LLMLingua has no inherent benefits or risks when it comes to those types of responsible AI issues.
+- LLMLingua may struggle to perform well at particularly high compression ratios, especially when the original prompts are already quite short.
 
-## What operational factors and settings allow for effective and responsible use of LLMLingua? 
+## What operational factors and settings allow for effective and responsible use of LLMLingua?
 
-- Users can set parameters such as the boundaries between different components (instruction, context, question) in the prompt, compression goals, and the small model used for compression calculations. Afterward, they can input the compressed prompt into black-box LLMs for use. 
+- Users can set parameters such as the boundaries between different components (instruction, context, question) in the prompt, compression goals, and the small model used for compression calculations. Afterward, they can input the compressed prompt into black-box LLMs for use.
 
 ## What is instruction, context, and question?
 

diff --git a/examples/CoT.ipynb b/examples/CoT.ipynb
@@ -147,6 +147,7 @@
    "source": [
     "# Download the original prompt and dataset\n",
     "from datasets import load_dataset\n",
+    "\n",
     "!wget https://raw.githubusercontent.com/FranxYao/chain-of-thought-hub/main/gsm8k/lib_prompt/prompt_hardest.txt\n",
     "prompt_complex = open(\"./prompt_hardest.txt\").read()\n",
     "gsm8k = load_dataset(\"gsm8k\", \"main\")\n",
@@ -162,6 +163,7 @@
    "source": [
     "# Using the OAI\n",
     "import openai\n",
+    "\n",
     "openai.api_key = \"<insert_openai_key>\""
    ]
   },
@@ -174,10 +176,11 @@
    "source": [
     "# or Using the AOAI\n",
     "import openai\n",
+    "\n",
     "openai.api_key = \"<insert_openai_key>\"\n",
     "openai.api_base = \"https://xxxx.openai.azure.com/\"\n",
-    "openai.api_type = 'azure'\n",
-    "openai.api_version = '2023-05-15'"
+    "openai.api_type = \"azure\"\n",
+    "openai.api_version = \"2023-05-15\""
    ]
   },
   {
@@ -267,6 +270,7 @@
    "source": [
     "# The response from original prompt\n",
     "import json\n",
+    "\n",
     "instruction = \"Please reference the following examples to answer the math question,\\n\"\n",
     "prompt = instruction + prompt_complex + \"\\n\\nQuestion: \" + question\n",
     "\n",
@@ -328,6 +332,7 @@
    "source": [
     "# Setup LLMLingua\n",
     "from llmlingua import PromptCompressor\n",
+    "\n",
     "llm_lingua = PromptCompressor()"
    ]
   },
@@ -382,7 +387,9 @@
     ")\n",
     "\n",
     "instruction = \"Please reference the following examples to answer the math question,\\n\"\n",
-    "prompt = instruction + compressed_prompt[\"compressed_prompt\"] + \"\\n\\nQuestion: \" + question\n",
+    "prompt = (\n",
+    "    instruction + compressed_prompt[\"compressed_prompt\"] + \"\\n\\nQuestion: \" + question\n",
+    ")\n",
     "\n",
     "request_data = {\n",
     "    \"prompt\": prompt,\n",
@@ -418,6 +425,7 @@
    "source": [
     "import re\n",
     "\n",
+    "\n",
     "def extract_ans(ans_model):\n",
     "    ans_model = ans_model.split(\"\\n\")\n",
     "    ans = []\n",
@@ -431,6 +439,7 @@
     "    residual = \"\\n\".join(residual)\n",
     "    return ans, residual\n",
     "\n",
+    "\n",
     "def parse_pred_ans(filename):\n",
     "    with open(filename) as fd:\n",
     "        lines = fd.readlines()\n",
@@ -506,6 +515,7 @@
     "# Test in GSM8K test set\n",
     "from tqdm import tqdm\n",
     "import os\n",
+    "\n",
     "os.makedirs(\"outputs\", exist_ok=True)\n",
     "i = 0\n",
     "\n",
@@ -518,11 +528,20 @@
     "    iterative_size=100,\n",
     ")\n",
     "\n",
-    "for q, a in tqdm(zip(gsm8k_test['question'], gsm8k_test['answer']), \n",
-    "                           total=len(gsm8k_test['question'])):\n",
-    "    instruction = \"Please reference the following examples to answer the math question,\\n\"\n",
-    "    prompt = instruction + compressed_prompt[\"compressed_prompt\"] + \"\\n\\nQuestion: \" + q + \"\\n\"\n",
-    "    \n",
+    "for q, a in tqdm(\n",
+    "    zip(gsm8k_test[\"question\"], gsm8k_test[\"answer\"]), total=len(gsm8k_test[\"question\"])\n",
+    "):\n",
+    "    instruction = (\n",
+    "        \"Please reference the following examples to answer the math question,\\n\"\n",
+    "    )\n",
+    "    prompt = (\n",
+    "        instruction\n",
+    "        + compressed_prompt[\"compressed_prompt\"]\n",
+    "        + \"\\n\\nQuestion: \"\n",
+    "        + q\n",
+    "        + \"\\n\"\n",
+    "    )\n",
+    "\n",
     "    request_data = {\n",
     "        \"prompt\": prompt,\n",
     "        \"max_tokens\": 400,\n",
@@ -537,8 +556,11 @@
     "    )\n",
     "    ans_model = response[\"choices\"][0][\"text\"]\n",
     "    ans_, residual = extract_ans(ans_model)\n",
-    "    with open('outputs/test_gpt_3.5_turbo_LLMLingua_174.txt', 'a') as fd:\n",
-    "        fd.write(\"Q: %s\\nA_model:\\n%s\\nA:\\n%s\\n\\n\" % (q, ans_.replace(\"Q:\", \"\").replace(\"A:\", \"\"), a))\n",
+    "    with open(\"outputs/test_gpt_3.5_turbo_LLMLingua_174.txt\", \"a\") as fd:\n",
+    "        fd.write(\n",
+    "            \"Q: %s\\nA_model:\\n%s\\nA:\\n%s\\n\\n\"\n",
+    "            % (q, ans_.replace(\"Q:\", \"\").replace(\"A:\", \"\"), a)\n",
+    "        )\n",
     "    i += 1"
    ]
   },
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		blank_issues_enabled: true
		blank_issues_enabled: true
-Original file line number
+Diff line change
@@ Expand Up / @@ -400,4 +400,4 @@ FodyWeavers.xsd @@
     # build
     build/*
-    dist/*
+    dist/*