Merge pull request #25 from homebrewltd/CI-CD/bach

Ci cd/bach
janhq · Aug 5, 2024 · 4584dca · 4584dca
2 parents f729efe + 8a2b8d2
commit 4584dca
Show file tree

Hide file tree

Showing 4 changed files with 35 additions and 6 deletions.
diff --git a/.github/workflows/test-models.yml b/.github/workflows/test-models.yml
@@ -1,4 +1,4 @@
-name: Test - Models
+name: Test and Benchmark Models
 on:
   workflow_dispatch:
     inputs:
@@ -17,9 +17,14 @@ on:
         required: false
         default: "--mode audio --num_rows 5"
         type: string
+      run_benchmark:
+        description: 'Run benchmark test'
+        required: false
+        default: false
+        type: boolean
 
 jobs:
-  run-test:
+  run-test-and-benchmark:
     runs-on: research
     steps:
       - name: Checkout
@@ -34,4 +39,25 @@ jobs:
       - name: Run tests
         working-directory: ./tests
         run: |
-          python3 test_case.py --model_dir ${{ github.event.inputs.model_id }} --data_dir ${{ github.event.inputs.dataset_id }}  ${{ github.event.inputs.extra_args }}
+          python3 test_case.py --model_dir ${{ github.event.inputs.model_id }} --data_dir ${{ github.event.inputs.dataset_id }} ${{ github.event.inputs.extra_args }}
+
+      - name: Install benchmark dependencies
+        if: ${{ github.event.inputs.run_benchmark == 'true' }}
+        working-directory: ./lm-evaluation-harness
+        run: |
+          pip3 install -e .
+          pip3 install lm_eval[vllm]
+
+      - name: Run benchmark
+        if: ${{ github.event.inputs.run_benchmark == 'true' }}
+        working-directory: ./lm-evaluation-harness
+        run: |
+          chmod +x ./run_benchmark.sh
+          ./run_benchmark.sh ${{ github.event.inputs.model_id }}
+
+      - name: Upload benchmark results
+        if: ${{ github.event.inputs.run_benchmark == 'true' }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: benchmark-results
+          path: ./lm-evaluation-harness/benchmark_results/*.json
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "lm-evaluation-harness"]
+	path = lm-evaluation-harness
+	url = [email protected]:homebrewltd/lm-evaluation-harness.git
diff --git a/lm-evaluation-harness b/lm-evaluation-harness
diff --git a/tests/test_case.py b/tests/test_case.py
@@ -114,8 +114,8 @@ def setUpClass(cls):
         else:
             print(f"Found {model_save_dir}. Skipping download.")
         # Model loading using vllm
-        cls.tokenizer = AutoTokenizer.from_pretrained(model_save_dir)
-        cls.llm = LLM(model_save_dir, tokenizer=model_save_dir, gpu_memory_utilization=0.6)
+        cls.tokenizer = AutoTokenizer.from_pretrained(model_dir)
+        cls.llm = LLM(model_dir, tokenizer=model_dir, gpu_memory_utilization=0.3)
 
         # Load dataset
         data_save_dir = os.path.join(args.cache_dir, args.data_dir)
@@ -150,7 +150,6 @@ def vllm_qna_inference(self, sample_id):
         text_input_str = self.dataset[sample_id]['prompt']
         expected_answer_str = self.dataset[sample_id]['answer']
         question_str = self.tokenizer.apply_chat_template([text_input_str], tokenize=False, add_generation_prompt=True)
-
         outputs = self.llm.generate(question_str, self.sampling_params)
         output_based_on_question = outputs[0].outputs[0].text
         output_token_ids = outputs[0].outputs[0].token_ids