From 1fdcc9c257f1d9cfa29fd486707517169e811d56 Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Wed, 7 Aug 2024 03:14:25 +0700 Subject: [PATCH 01/19] revert the code --- lm-evaluation-harness | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lm-evaluation-harness b/lm-evaluation-harness index 58b0b06..7d9e257 160000 --- a/lm-evaluation-harness +++ b/lm-evaluation-harness @@ -1 +1 @@ -Subproject commit 58b0b0605f8f6a8f0d71dde7901d67f9a1759d6e +Subproject commit 7d9e257015e83336cc691bda97fa75dc7e7969c2 From 3939a34f574b9a7e9b63e1bf24ebb07b713a61ac Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Wed, 7 Aug 2024 03:31:12 +0700 Subject: [PATCH 02/19] refactor the code using pytest and correct yaml file --- .github/workflows/test-branch.yml | 32 +++++++++- .github/workflows/test-models.yml | 6 +- tests/conftest.py | 22 +++++++ tests/requirements.txt | 6 +- tests/test_case.py | 4 +- tests/unit_test.py | 102 ++++++++++++++++++++++++++++++ 6 files changed, 164 insertions(+), 8 deletions(-) create mode 100644 tests/conftest.py create mode 100644 tests/unit_test.py diff --git a/.github/workflows/test-branch.yml b/.github/workflows/test-branch.yml index a02e536..83521e1 100644 --- a/.github/workflows/test-branch.yml +++ b/.github/workflows/test-branch.yml @@ -9,7 +9,7 @@ on: model_id: description: 'Model ID on huggingface, for example: jan-hq/Jan-Llama3-0708' required: true - default: jan-hq/Jan-Llama3-0708 + default: homebrewltd/llama3-s-2024-07-08 type: string dataset_id: description: 'Dataset ID on huggingface, for example: jan-hq/instruction-speech-conversation-test' @@ -21,6 +21,11 @@ on: required: false default: "--mode audio --num_rows 5" type: string + run_benchmark: + description: 'Run benchmark test' + required: false + default: false + type: boolean jobs: run-test: @@ -28,6 +33,8 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 + with: + submodules: 'recursive' - name: Install dependencies working-directory: ./tests @@ -38,4 +45,25 @@ jobs: - name: Run tests working-directory: ./tests run: | - python3 test_case.py --model_dir ${{ github.event.inputs.model_id || 'jan-hq/Jan-Llama3-0708' }} --data_dir ${{ github.event.inputs.dataset_id || 'jan-hq/instruction-speech-conversation-test' }} ${{ github.event.inputs.extra_args || '--mode audio --num_rows 5' }} \ No newline at end of file + python3 test_case.py --model_dir ${{ github.event.inputs.model_id }} --data_dir ${{ github.event.inputs.dataset_id }} ${{ github.event.inputs.extra_args }} + + - name: Install benchmark dependencies + if: ${{ github.event.inputs.run_benchmark == 'true' }} + working-directory: ./lm-evaluation-harness + run: | + pip3 install -e . + pip3 install lm_eval[vllm] + + - name: Run benchmark + if: ${{ github.event.inputs.run_benchmark == 'true' }} + working-directory: ./lm-evaluation-harness + run: | + chmod +x ./run_benchmark.sh + ./run_benchmark.sh ${{ github.event.inputs.model_id }} + + - name: Upload benchmark results + if: ${{ github.event.inputs.run_benchmark == 'true' }} + uses: actions/upload-artifact@v2 + with: + name: benchmark-results + path: ./lm-evaluation-harness/benchmark_results/*.json \ No newline at end of file diff --git a/.github/workflows/test-models.yml b/.github/workflows/test-models.yml index a51d9d3..5011603 100644 --- a/.github/workflows/test-models.yml +++ b/.github/workflows/test-models.yml @@ -3,9 +3,9 @@ on: workflow_dispatch: inputs: model_id: - description: 'Model ID on huggingface, for example: jan-hq/Jan-Llama3-0708' + description: 'Model ID on huggingface, for example: homebrewltd/llama3-s-2024-07-08' required: true - default: jan-hq/Jan-Llama3-0708 + default: homebrewltd/llama3-s-2024-07-08 type: string dataset_id: description: 'Dataset ID on huggingface, for example: jan-hq/instruction-speech-conversation-test' @@ -29,6 +29,8 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 + with: + submodules: 'recursive' - name: Install dependencies working-directory: ./tests diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..83483fc --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,22 @@ +import pytest + +def pytest_addoption(parser): + parser.addoption("--model_dir", type=str, default="jan-hq/Jan-Llama3-0708", help="Hugging Face model link or local_dir") + parser.addoption("--max_length", type=int, default=1024, help="Maximum length of the output") + parser.addoption("--data_dir", type=str, required=True, help="Hugging Face model repository link or Data path") + parser.addoption("--cache_dir", type=str, default=".", help="Absolute path to save the model and dataset") + parser.addoption("--mode", type=str, default="audio", help="Mode of the model (audio or text)") + parser.addoption("--num_rows", type=int, default=5, help="Number of dataset rows to process") + parser.addoption("--output_file", type=str, default="output/", help="Output file path") + +@pytest.fixture(scope="session") +def custom_args(request): + return { + "model_dir": request.config.getoption("--model_dir"), + "max_length": request.config.getoption("--max_length"), + "data_dir": request.config.getoption("--data_dir"), + "cache_dir": request.config.getoption("--cache_dir"), + "mode": request.config.getoption("--mode"), + "num_rows": request.config.getoption("--num_rows"), + "output_file": request.config.getoption("--output_file"), + } \ No newline at end of file diff --git a/tests/requirements.txt b/tests/requirements.txt index e41cf80..cad61c3 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,7 +1,9 @@ -datasets==2.20.0 torch==2.3.0 +datasets==2.20.0 transformers vllm huggingface_hub==0.23.4 pandas==2.2.2 -nltk \ No newline at end of file +nltk +pytest +pytest-cov diff --git a/tests/test_case.py b/tests/test_case.py index 624b701..07ce601 100644 --- a/tests/test_case.py +++ b/tests/test_case.py @@ -114,8 +114,8 @@ def setUpClass(cls): else: print(f"Found {model_save_dir}. Skipping download.") # Model loading using vllm - cls.tokenizer = AutoTokenizer.from_pretrained(model_dir) - cls.llm = LLM(model_dir, tokenizer=model_dir, gpu_memory_utilization=0.3) + cls.tokenizer = AutoTokenizer.from_pretrained(model_save_dir) + cls.llm = LLM(model_save_dir, tokenizer=model_save_dir, gpu_memory_utilization=0.3) # Load dataset data_save_dir = os.path.join(args.cache_dir, args.data_dir) diff --git a/tests/unit_test.py b/tests/unit_test.py new file mode 100644 index 0000000..ffc1fa0 --- /dev/null +++ b/tests/unit_test.py @@ -0,0 +1,102 @@ +import pytest +from huggingface_hub import snapshot_download +from vllm import LLM, SamplingParams +from transformers import AutoTokenizer +from datasets import load_dataset +import pandas as pd +import numpy as np +import os +import time + +@pytest.fixture(scope="module") +def model_setup(custom_args): + args = custom_args + model_name = args.model_dir.split("/")[-1] + save_dir_output = f'{args.output_file}/{model_name}-{args.mode}-Result.csv' + if not os.path.exists(args.output_file): + os.makedirs(args.output_file) + + sampling_params = SamplingParams(temperature=0.0, max_tokens=args.max_length, skip_special_tokens=False) + + model_save_dir = os.path.join(args.cache_dir, args.model_dir) + if not os.path.exists(model_save_dir): + snapshot_download(args.model_dir, local_dir=model_save_dir, max_workers=64) + else: + print(f"Found {model_save_dir}. Skipping download.") + + tokenizer = AutoTokenizer.from_pretrained(model_save_dir) + llm = LLM(model_save_dir, tokenizer=model_save_dir, gpu_memory_utilization=0.3) + + data_save_dir = os.path.join(args.cache_dir, args.data_dir) + dataset = load_dataset(args.data_dir, split='train') + num_rows = min(args.num_rows, len(dataset)) + + return args, tokenizer, llm, dataset, num_rows, sampling_params, save_dir_output + +@pytest.fixture(scope="module") +def inference_results(model_setup): + args, tokenizer, llm, dataset, num_rows, sampling_params, _ = model_setup + results = [] + + def vllm_sound_inference(sample_id): + sound_messages = dataset[sample_id]['sound_convo'][0] + expected_output_messages = dataset[sample_id]['sound_convo'][1] + sound_input_str = tokenizer.apply_chat_template([sound_messages], tokenize=False, add_generation_prompt=True) + text_input_str = dataset[sample_id]['prompt'] + expected_output_str = tokenizer.apply_chat_template([expected_output_messages], tokenize=False) + + outputs = llm.generate(sound_input_str, sampling_params) + output_based_on_sound = outputs[0].outputs[0].text + output_token_ids = outputs[0].outputs[0].token_ids + + return text_input_str, output_based_on_sound, expected_output_str, output_token_ids + + def vllm_qna_inference(sample_id): + text_input_str = dataset[sample_id]['prompt'] + expected_answer_str = dataset[sample_id]['answer'] + question_str = tokenizer.apply_chat_template([text_input_str], tokenize=False, add_generation_prompt=True) + outputs = llm.generate(question_str, sampling_params) + output_based_on_question = outputs[0].outputs[0].text + output_token_ids = outputs[0].outputs[0].token_ids + + return text_input_str, output_based_on_question, expected_answer_str, output_token_ids + if args.mode == "audio": + for i in range(num_rows): + results.append(vllm_sound_inference(i)) + elif args.mode == "text": + for i in range(num_rows): + results.append(vllm_qna_inference(i)) + + df_results = pd.DataFrame(results, columns=['input', 'output', 'expected_output', 'output_token_ids']) + df_results.to_csv(save_dir_output, index=False, encoding='utf-8') + print(f"Successfully saved in {save_dir_output}") + + return results + +def test_model_output(inference_results): + for text_input_str, output_based_on_sound, expected_output_str, output_token_ids in inference_results: + assert len(output_based_on_sound) > 0, "Output should not be empty" + assert isinstance(output_based_on_sound, str), "Output should be a string" + assert all(token >= 0 for token in output_token_ids), "Output tokens should be valid" + +def test_special_tokens(model_setup, inference_results): + _, tokenizer, _, _, _, _, _ = model_setup + special_tokens = [tokenizer.bos_token, tokenizer.eos_token, tokenizer.pad_token] + for token in special_tokens: + if token: + encoded = tokenizer.encode(token) + assert encoded[0] != -100, f"Special token {token} should not be ignored" + +def test_no_nan_outputs(inference_results): + for _, output, _, _ in inference_results: + assert not any(np.isnan(float(word)) for word in output.split() if word.replace('.', '').isdigit()), "Output should not contain NaN values" + +def test_eos_token_generation(model_setup, inference_results): + _, tokenizer, _, _, _, _, _ = model_setup + eos_token_id = tokenizer.eos_token_id + for _, _, _, output_token_ids in inference_results: + assert eos_token_id in output_token_ids, "EOS token not found in the generated output" + assert output_token_ids[-1] == eos_token_id, "EOS token is not at the end of the sequence" + assert output_token_ids.count(eos_token_id) == 1, f"Expected 1 EOS token, but found {output_token_ids.count(eos_token_id)}" + +# Additional tests can be added here \ No newline at end of file From 637f4f63d350772cb0cb4c909923dcad4784bcab Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Wed, 7 Aug 2024 03:32:03 +0700 Subject: [PATCH 03/19] add audio bench as submodule --- .gitmodules | 3 +++ AudioBench | 1 + 2 files changed, 4 insertions(+) create mode 160000 AudioBench diff --git a/.gitmodules b/.gitmodules index b6b8212..7fa598a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "lm-evaluation-harness"] path = lm-evaluation-harness url = git@github.com:homebrewltd/lm-evaluation-harness.git +[submodule "AudioBench"] + path = AudioBench + url = git@github.com:homebrewltd/AudioBench.git diff --git a/AudioBench b/AudioBench new file mode 160000 index 0000000..5e2b856 --- /dev/null +++ b/AudioBench @@ -0,0 +1 @@ +Subproject commit 5e2b8565b34ef522457ccfb0c99f60bbdd1a51ea From 3f6b56e60c1cbcaaa90485118644b5445a2e7ab3 Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Wed, 7 Aug 2024 03:34:29 +0700 Subject: [PATCH 04/19] correct some bug --- .github/workflows/test-branch.yml | 2 +- .github/workflows/test-models.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-branch.yml b/.github/workflows/test-branch.yml index 83521e1..787740f 100644 --- a/.github/workflows/test-branch.yml +++ b/.github/workflows/test-branch.yml @@ -45,7 +45,7 @@ jobs: - name: Run tests working-directory: ./tests run: | - python3 test_case.py --model_dir ${{ github.event.inputs.model_id }} --data_dir ${{ github.event.inputs.dataset_id }} ${{ github.event.inputs.extra_args }} + python3 test_case.py --model_dir ${{ github.event.inputs.model_id || 'jan-hq/Jan-Llama3-0708' }} --data_dir ${{ github.event.inputs.dataset_id || 'jan-hq/instruction-speech-conversation-test' }} ${{ github.event.inputs.extra_args || '--mode audio --num_rows 5' }} - name: Install benchmark dependencies if: ${{ github.event.inputs.run_benchmark == 'true' }} diff --git a/.github/workflows/test-models.yml b/.github/workflows/test-models.yml index 5011603..4d12fd9 100644 --- a/.github/workflows/test-models.yml +++ b/.github/workflows/test-models.yml @@ -41,7 +41,7 @@ jobs: - name: Run tests working-directory: ./tests run: | - python3 test_case.py --model_dir ${{ github.event.inputs.model_id }} --data_dir ${{ github.event.inputs.dataset_id }} ${{ github.event.inputs.extra_args }} + python3 test_case.py --model_dir ${{ github.event.inputs.model_id || 'jan-hq/Jan-Llama3-0708' }} --data_dir ${{ github.event.inputs.dataset_id || 'jan-hq/instruction-speech-conversation-test' }} ${{ github.event.inputs.extra_args || '--mode audio --num_rows 5' }} - name: Install benchmark dependencies if: ${{ github.event.inputs.run_benchmark == 'true' }} From 9701f35c31e853cf95c9ba22aab7592562aa5dab Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Wed, 7 Aug 2024 03:41:35 +0700 Subject: [PATCH 05/19] set gpu util = -1 --- tests/test_case.py | 2 +- tests/unit_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_case.py b/tests/test_case.py index 07ce601..f656d20 100644 --- a/tests/test_case.py +++ b/tests/test_case.py @@ -115,7 +115,7 @@ def setUpClass(cls): print(f"Found {model_save_dir}. Skipping download.") # Model loading using vllm cls.tokenizer = AutoTokenizer.from_pretrained(model_save_dir) - cls.llm = LLM(model_save_dir, tokenizer=model_save_dir, gpu_memory_utilization=0.3) + cls.llm = LLM(model_save_dir, tokenizer=model_save_dir, gpu_memory_utilization=-1) # Load dataset data_save_dir = os.path.join(args.cache_dir, args.data_dir) diff --git a/tests/unit_test.py b/tests/unit_test.py index ffc1fa0..c761dd4 100644 --- a/tests/unit_test.py +++ b/tests/unit_test.py @@ -25,7 +25,7 @@ def model_setup(custom_args): print(f"Found {model_save_dir}. Skipping download.") tokenizer = AutoTokenizer.from_pretrained(model_save_dir) - llm = LLM(model_save_dir, tokenizer=model_save_dir, gpu_memory_utilization=0.3) + llm = LLM(model_save_dir, tokenizer=model_save_dir, gpu_memory_utilization=-1) data_save_dir = os.path.join(args.cache_dir, args.data_dir) dataset = load_dataset(args.data_dir, split='train') From c1809535c5079c276afbe9341f5c3034a9ae5940 Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Wed, 7 Aug 2024 03:48:48 +0700 Subject: [PATCH 06/19] debug --- tests/test_case.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_case.py b/tests/test_case.py index f656d20..dbcaca5 100644 --- a/tests/test_case.py +++ b/tests/test_case.py @@ -115,7 +115,7 @@ def setUpClass(cls): print(f"Found {model_save_dir}. Skipping download.") # Model loading using vllm cls.tokenizer = AutoTokenizer.from_pretrained(model_save_dir) - cls.llm = LLM(model_save_dir, tokenizer=model_save_dir, gpu_memory_utilization=-1) + cls.llm = LLM(model_save_dir, tokenizer=model_save_dir) # Load dataset data_save_dir = os.path.join(args.cache_dir, args.data_dir) From dcf249414609be613d8d6810a7376fb0f403bdb4 Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Wed, 7 Aug 2024 04:06:56 +0700 Subject: [PATCH 07/19] bump submodule to new version --- lm-evaluation-harness | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lm-evaluation-harness b/lm-evaluation-harness index 7d9e257..86c151d 160000 --- a/lm-evaluation-harness +++ b/lm-evaluation-harness @@ -1 +1 @@ -Subproject commit 7d9e257015e83336cc691bda97fa75dc7e7969c2 +Subproject commit 86c151deb1a93420583cfee95f6bbc08327f201c From bea0a4ea262abbf1dab1bc4de25da5db81b291f0 Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Wed, 7 Aug 2024 04:11:49 +0700 Subject: [PATCH 08/19] add some changes to workflow --- .github/workflows/test-models.yml | 1 + lm-evaluation-harness | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-models.yml b/.github/workflows/test-models.yml index 4d12fd9..acb5acd 100644 --- a/.github/workflows/test-models.yml +++ b/.github/workflows/test-models.yml @@ -49,6 +49,7 @@ jobs: run: | pip3 install -e . pip3 install lm_eval[vllm] + echo "$HOME/.local/bin" >> $GITHUB_PATH - name: Run benchmark if: ${{ github.event.inputs.run_benchmark == 'true' }} diff --git a/lm-evaluation-harness b/lm-evaluation-harness index 86c151d..e631864 160000 --- a/lm-evaluation-harness +++ b/lm-evaluation-harness @@ -1 +1 @@ -Subproject commit 86c151deb1a93420583cfee95f6bbc08327f201c +Subproject commit e631864966b526de2b9a8bda0e4f3cf2170e74f7 From 2f77c408c46f5201bbee44a4f9273cee63c898be Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Wed, 7 Aug 2024 04:22:19 +0700 Subject: [PATCH 09/19] debug --- .github/workflows/test-models.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-models.yml b/.github/workflows/test-models.yml index acb5acd..59a9095 100644 --- a/.github/workflows/test-models.yml +++ b/.github/workflows/test-models.yml @@ -20,7 +20,7 @@ on: run_benchmark: description: 'Run benchmark test' required: false - default: false + default: true type: boolean jobs: @@ -53,8 +53,8 @@ jobs: - name: Run benchmark if: ${{ github.event.inputs.run_benchmark == 'true' }} - working-directory: ./lm-evaluation-harness run: | + cd lm-evaluation-harness chmod +x ./run_benchmark.sh ./run_benchmark.sh ${{ github.event.inputs.model_id }} From a57916bffa916c4b83b1748c9c812cde50436537 Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Wed, 7 Aug 2024 04:41:21 +0700 Subject: [PATCH 10/19] change workflow of dev branch --- .github/workflows/test-branch.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-branch.yml b/.github/workflows/test-branch.yml index 787740f..5aea07f 100644 --- a/.github/workflows/test-branch.yml +++ b/.github/workflows/test-branch.yml @@ -56,8 +56,8 @@ jobs: - name: Run benchmark if: ${{ github.event.inputs.run_benchmark == 'true' }} - working-directory: ./lm-evaluation-harness run: | + cd lm-evaluation-harness chmod +x ./run_benchmark.sh ./run_benchmark.sh ${{ github.event.inputs.model_id }} From ca481fe27651fdbc8115ec0136f6e4189cda7075 Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Wed, 7 Aug 2024 05:06:20 +0700 Subject: [PATCH 11/19] correct small error --- .github/workflows/test-branch.yml | 5 +++-- .github/workflows/test-models.yml | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-branch.yml b/.github/workflows/test-branch.yml index 5aea07f..359e49c 100644 --- a/.github/workflows/test-branch.yml +++ b/.github/workflows/test-branch.yml @@ -24,7 +24,7 @@ on: run_benchmark: description: 'Run benchmark test' required: false - default: false + default: true type: boolean jobs: @@ -49,10 +49,11 @@ jobs: - name: Install benchmark dependencies if: ${{ github.event.inputs.run_benchmark == 'true' }} - working-directory: ./lm-evaluation-harness run: | + cd lm-evaluation-harness pip3 install -e . pip3 install lm_eval[vllm] + echo "$HOME/.local/bin" >> $GITHUB_PATH - name: Run benchmark if: ${{ github.event.inputs.run_benchmark == 'true' }} diff --git a/.github/workflows/test-models.yml b/.github/workflows/test-models.yml index 59a9095..d41393e 100644 --- a/.github/workflows/test-models.yml +++ b/.github/workflows/test-models.yml @@ -45,8 +45,8 @@ jobs: - name: Install benchmark dependencies if: ${{ github.event.inputs.run_benchmark == 'true' }} - working-directory: ./lm-evaluation-harness run: | + cd lm-evaluation-harness pip3 install -e . pip3 install lm_eval[vllm] echo "$HOME/.local/bin" >> $GITHUB_PATH From 1e8c9fba9551443f6a3f926ec3b13aeb13092b65 Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Wed, 7 Aug 2024 06:22:52 +0700 Subject: [PATCH 12/19] bump submodule version --- lm-evaluation-harness | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lm-evaluation-harness b/lm-evaluation-harness index e631864..5505969 160000 --- a/lm-evaluation-harness +++ b/lm-evaluation-harness @@ -1 +1 @@ -Subproject commit e631864966b526de2b9a8bda0e4f3cf2170e74f7 +Subproject commit 5505969071b6daf2ee368ed9497fa33f9c2972b9 From 6111b8c31a9f468d2a92da8f0fd6e1570d64dc99 Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Sun, 11 Aug 2024 01:54:19 +0700 Subject: [PATCH 13/19] add new version of vllm to resolve conflict and add audio bench as option in workflow --- .github/workflows/test-branch.yml | 5 +++++ .github/workflows/test-models.yml | 5 +++++ tests/requirements.txt | 4 ++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-branch.yml b/.github/workflows/test-branch.yml index 359e49c..760ffaa 100644 --- a/.github/workflows/test-branch.yml +++ b/.github/workflows/test-branch.yml @@ -26,6 +26,11 @@ on: required: false default: true type: boolean + run audio_benchmark: + description: 'Run audio benchmark test' + required: false + default: true + type: boolean jobs: run-test: diff --git a/.github/workflows/test-models.yml b/.github/workflows/test-models.yml index d41393e..6fb5082 100644 --- a/.github/workflows/test-models.yml +++ b/.github/workflows/test-models.yml @@ -22,6 +22,11 @@ on: required: false default: true type: boolean + run audio_benchmark: + description: 'Run audio benchmark test' + required: false + default: true + type: boolean jobs: run-test-and-benchmark: diff --git a/tests/requirements.txt b/tests/requirements.txt index cad61c3..0ae6ed3 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,7 +1,7 @@ torch==2.3.0 datasets==2.20.0 -transformers -vllm +transformers>=4.43.0 +vllm==0.5.3.post1 huggingface_hub==0.23.4 pandas==2.2.2 nltk From 8bcc2ebfdee5b0c3d1e65ae40e849ddfbefe4ece Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Sun, 11 Aug 2024 02:20:34 +0700 Subject: [PATCH 14/19] vllm required torch==2.3.1 --- tests/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/requirements.txt b/tests/requirements.txt index 0ae6ed3..caca143 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,4 +1,4 @@ -torch==2.3.0 +torch==2.3.1 datasets==2.20.0 transformers>=4.43.0 vllm==0.5.3.post1 From 8fcb3441afcbd714cb3174a1090c5a1fc6138ee8 Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Sun, 11 Aug 2024 04:12:58 +0700 Subject: [PATCH 15/19] temp fix the bug by allowing 1 gpu bench for now --- lm-evaluation-harness | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lm-evaluation-harness b/lm-evaluation-harness index 5505969..bb5da65 160000 --- a/lm-evaluation-harness +++ b/lm-evaluation-harness @@ -1 +1 @@ -Subproject commit 5505969071b6daf2ee368ed9497fa33f9c2972b9 +Subproject commit bb5da65ba11ed26476b5f1d281823d2f643ec69b From 82874f31d9c2e8776a2b97204a2e6b0eae0dcbb7 Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Sun, 11 Aug 2024 04:29:01 +0700 Subject: [PATCH 16/19] update vllm to 0.5.4 as experiemnent this will auto find libcudart.so --- tests/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/requirements.txt b/tests/requirements.txt index caca143..ccb1c16 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,7 +1,7 @@ torch==2.3.1 datasets==2.20.0 transformers>=4.43.0 -vllm==0.5.3.post1 +vllm==0.5.4 huggingface_hub==0.23.4 pandas==2.2.2 nltk From 3bd0d3f9ffdf730400097a318418297db6eb6a5b Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Sun, 11 Aug 2024 04:31:40 +0700 Subject: [PATCH 17/19] debug --- tests/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/requirements.txt b/tests/requirements.txt index ccb1c16..d9e8afa 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,4 +1,4 @@ -torch==2.3.1 +torch==2.4.0 datasets==2.20.0 transformers>=4.43.0 vllm==0.5.4 From 8187f596aaaa303ea0798c6772488d6fcfee1f83 Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Sun, 11 Aug 2024 05:08:58 +0700 Subject: [PATCH 18/19] add new base docker images and # Set the MKL_SERVICE_FORCE_INTEL environment variable --- .github/runners/Dockerfile | 5 ++++- lm-evaluation-harness | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/runners/Dockerfile b/.github/runners/Dockerfile index 6f5827d..fe31363 100644 --- a/.github/runners/Dockerfile +++ b/.github/runners/Dockerfile @@ -1,4 +1,7 @@ -FROM docker.io/pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime +FROM docker.io/pytorch/pytorch:2.4.0-cuda12.1-cudnn9-runtime + +# Set the MKL_SERVICE_FORCE_INTEL environment variable +ENV MKL_SERVICE_FORCE_INTEL=1 RUN apt-get update \ && apt-get install -y --no-install-recommends \ diff --git a/lm-evaluation-harness b/lm-evaluation-harness index bb5da65..fcaa0b1 160000 --- a/lm-evaluation-harness +++ b/lm-evaluation-harness @@ -1 +1 @@ -Subproject commit bb5da65ba11ed26476b5f1d281823d2f643ec69b +Subproject commit fcaa0b1ea72f8ab855eed5ce8bfe07605f110f61 From c62b968dfae4e36d6847c306020e731629937656 Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Sun, 11 Aug 2024 05:20:28 +0700 Subject: [PATCH 19/19] bump new version and debug a small bug --- .github/workflows/test-models.yml | 2 +- lm-evaluation-harness | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-models.yml b/.github/workflows/test-models.yml index 6fb5082..456c098 100644 --- a/.github/workflows/test-models.yml +++ b/.github/workflows/test-models.yml @@ -68,4 +68,4 @@ jobs: uses: actions/upload-artifact@v2 with: name: benchmark-results - path: ./lm-evaluation-harness/benchmark_results/*.json \ No newline at end of file + path: ./lm-evaluation-harness/benchmark_results/**/*.json \ No newline at end of file diff --git a/lm-evaluation-harness b/lm-evaluation-harness index fcaa0b1..1996e4e 160000 --- a/lm-evaluation-harness +++ b/lm-evaluation-harness @@ -1 +1 @@ -Subproject commit fcaa0b1ea72f8ab855eed5ce8bfe07605f110f61 +Subproject commit 1996e4e3e63adf2458ff0368e8f36a439ef3979f