From 87d69afc2d5d34a01ab9e7444a2b1d510aca54bc Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Thu, 18 Jul 2024 13:16:57 +0700 Subject: [PATCH 1/4] refactor current test as unit-test --- .github/workflows/test-flow.yml | 1 + test_flows.py | 20 ----- tests/test_flows.py | 152 ++++++++++++++++++++++++++++++++ 3 files changed, 153 insertions(+), 20 deletions(-) delete mode 100644 test_flows.py create mode 100644 tests/test_flows.py diff --git a/.github/workflows/test-flow.yml b/.github/workflows/test-flow.yml index f2ff92e..8498991 100644 --- a/.github/workflows/test-flow.yml +++ b/.github/workflows/test-flow.yml @@ -20,6 +20,7 @@ jobs: pip install -e . - name: Run test_flow.py + working-directory: ./tests run: python test_flows.py env: S3_ACCESS_KEY: ${{ secrets.MINIO_ACCESS_KEY_ID }} diff --git a/test_flows.py b/test_flows.py deleted file mode 100644 index aeea043..0000000 --- a/test_flows.py +++ /dev/null @@ -1,20 +0,0 @@ -from s3helper import S3Helper,S3HelperAutoConfig,S3HelperAutoTokenizer,S3HelperAutoModelForCausalLM, s3_load_dataset -import os -import logging - -# os.environ['S3_ACCESS_KEY'] = 'minioadmin' -# os.environ['S3_SECRET_KEY'] = 'minioadmin' -# os.environ['S3_ENDPOINT_URL'] = 'http://172.17.0.2:9000' -S3Helper() - -# # Example usage -model_name = "jan-hq-test/tokenizer-tinyllama" -# model = S3HelperAutoModelForCausalLM.from_pretrained(model_name) -tokenizer = S3HelperAutoTokenizer.from_pretrained(model_name) -logging.info(f"Tokenizer Loading successful: {tokenizer}") -# print(tokenizer) -# config = S3HelperAutoConfig.from_pretrained(model_name) -# Make sure S3Helper is initialized and environment variables are set -# Load a dataset from S3 bucket -dataset = s3_load_dataset("jan-hq-test/test-dataset",file_format='parquet', split='train') -logging.info(f"Dataset Loading successful") \ No newline at end of file diff --git a/tests/test_flows.py b/tests/test_flows.py new file mode 100644 index 0000000..6b3242e --- /dev/null +++ b/tests/test_flows.py @@ -0,0 +1,152 @@ +import unittest +import os +from unittest.mock import patch, MagicMock +from io import StringIO +import time +from s3helper import ( + S3Helper, + S3HelperAutoConfig, + S3HelperAutoTokenizer, + S3HelperAutoModelForCausalLM, + s3_load_dataset, +) +import sys + +class CustomTestResult(unittest.TestResult): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.successes = [] + + def addSuccess(self, test): + super().addSuccess(test) + self.successes.append(test) + +class CustomTestRunner(unittest.TextTestRunner): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.stream = StringIO() + self.results = [] + + def run(self, test): + result = CustomTestResult() + start_time = time.time() + test(result) + time_taken = time.time() - start_time + self.results.append((result, time_taken)) + return result + + def print_results(self): + print("\n=== Test Results ===") + total_tests = 0 + total_successes = 0 + total_failures = 0 + total_errors = 0 + total_time = 0 + + for result, time_taken in self.results: + total_tests += result.testsRun + total_successes += len(result.successes) + total_failures += len(result.failures) + total_errors += len(result.errors) + total_time += time_taken + + print(f"Ran {total_tests} tests in {total_time:.3f} seconds") + print(f"Successes: {total_successes}") + print(f"Failures: {total_failures}") + print(f"Errors: {total_errors}") + + print("\nDetailed Results:") + for result, time_taken in self.results: + # todo: add time taken for each test + for test in result.successes: + print(f"PASS: {test._testMethodName}") + for test, _ in result.failures: + print(f"FAIL: {test._testMethodName}") + for test, _ in result.errors: + test_name = getattr(test, '_testMethodName', str(test)) + print(f"ERROR: {test_name}") + + if total_failures > 0 or total_errors > 0: + print("\nFailure and Error Details:") + for result, _ in self.results: + for test, traceback in result.failures: + print(f"\nFAILURE: {test._testMethodName}") + print(traceback) + for test, traceback in result.errors: + test_name = getattr(test, '_testMethodName', str(test)) + print(f"\nERROR: {test_name}") + print(traceback) + else: + print("\nAll tests passed successfully!") + +def test_name(name): + def decorator(func): + func.__name__ = name + return func + return decorator + +class TestS3Helper(unittest.TestCase): + @classmethod + def setUpClass(cls): + # Set up any necessary test environment + os.environ['S3_ACCESS_KEY'] = 'test_access_key' + os.environ['S3_SECRET_KEY'] = 'test_secret_key' + os.environ['S3_ENDPOINT_URL'] = 'http://test.endpoint:9000' + + @test_name("S3Helper Initialization") + def test_s3helper_initialization(self): + with patch('s3helper.S3Helper') as mock_s3helper: + S3Helper() + mock_s3helper.assert_called_once() + + @test_name("AutoTokenizer from_pretrained") + def test_auto_tokenizer_from_pretrained(self): + with patch('s3helper.S3HelperAutoTokenizer.from_pretrained') as mock_from_pretrained: + model_name = "jan-hq-test/tokenizer-tinyllama" + mock_tokenizer = MagicMock() + mock_from_pretrained.return_value = mock_tokenizer + + tokenizer = S3HelperAutoTokenizer.from_pretrained(model_name) + + mock_from_pretrained.assert_called_once_with(model_name) + self.assertEqual(tokenizer, mock_tokenizer) + + @test_name("s3_load_dataset") + def test_s3_load_dataset(self): + with patch('s3helper.s3_load_dataset') as mock_load_dataset: + mock_dataset = MagicMock() + mock_load_dataset.return_value = mock_dataset + + dataset = s3_load_dataset("jan-hq-test/test-dataset", file_format='parquet', split='train') + + mock_load_dataset.assert_called_once_with("jan-hq-test/test-dataset", file_format='parquet', split='train') + self.assertEqual(dataset, mock_dataset) + + @test_name("AutoModelForCausalLM from_pretrained") + def test_auto_model_for_causal_lm_from_pretrained(self): + with patch('s3helper.S3HelperAutoModelForCausalLM.from_pretrained') as mock_from_pretrained: + model_name = "jan-hq-test/tokenizer-tinyllama" + mock_model = MagicMock() + mock_from_pretrained.return_value = mock_model + + model = S3HelperAutoModelForCausalLM.from_pretrained(model_name) + + mock_from_pretrained.assert_called_once_with(model_name) + self.assertEqual(model, mock_model) + + @test_name("AutoConfig from_pretrained") + def test_auto_config_from_pretrained(self): + with patch('s3helper.S3HelperAutoConfig.from_pretrained') as mock_from_pretrained: + model_name = "jan-hq-test/tokenizer-tinyllama" + mock_config = MagicMock() + mock_from_pretrained.return_value = mock_config + + config = S3HelperAutoConfig.from_pretrained(model_name) + + mock_from_pretrained.assert_called_once_with(model_name) + self.assertEqual(config, mock_config) + +if __name__ == "__main__": + runner = CustomTestRunner(stream=sys.stdout, verbosity=2) + unittest.main(argv=['first-arg-is-ignored'], exit=False, testRunner=runner) + runner.print_results() \ No newline at end of file From 536cd580cfa38f127957e821c7e74097774e05e6 Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Thu, 18 Jul 2024 13:49:37 +0700 Subject: [PATCH 2/4] debug --- tests/test_flows.py | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/tests/test_flows.py b/tests/test_flows.py index 6b3242e..5200c28 100644 --- a/tests/test_flows.py +++ b/tests/test_flows.py @@ -89,61 +89,58 @@ class TestS3Helper(unittest.TestCase): @classmethod def setUpClass(cls): # Set up any necessary test environment - os.environ['S3_ACCESS_KEY'] = 'test_access_key' - os.environ['S3_SECRET_KEY'] = 'test_secret_key' - os.environ['S3_ENDPOINT_URL'] = 'http://test.endpoint:9000' + cls.model_name_or_path = "jan-hq-test/tinyllama-v1.1" + cls.dataset_name_or_path = "jan-hq-test/test-dataset" + - @test_name("S3Helper Initialization") + @test_name("Connect to Minio") def test_s3helper_initialization(self): with patch('s3helper.S3Helper') as mock_s3helper: S3Helper() mock_s3helper.assert_called_once() - @test_name("AutoTokenizer from_pretrained") + @test_name("Load tokenizer from Minio") def test_auto_tokenizer_from_pretrained(self): with patch('s3helper.S3HelperAutoTokenizer.from_pretrained') as mock_from_pretrained: - model_name = "jan-hq-test/tokenizer-tinyllama" mock_tokenizer = MagicMock() mock_from_pretrained.return_value = mock_tokenizer - tokenizer = S3HelperAutoTokenizer.from_pretrained(model_name) + tokenizer = S3HelperAutoTokenizer.from_pretrained(self.model_name_or_path) - mock_from_pretrained.assert_called_once_with(model_name) + mock_from_pretrained.assert_called_once_with(self.model_name_or_path) self.assertEqual(tokenizer, mock_tokenizer) - @test_name("s3_load_dataset") + @test_name("Load dataset from Minio") def test_s3_load_dataset(self): with patch('s3helper.s3_load_dataset') as mock_load_dataset: mock_dataset = MagicMock() mock_load_dataset.return_value = mock_dataset - dataset = s3_load_dataset("jan-hq-test/test-dataset", file_format='parquet', split='train') + dataset = s3_load_dataset(self.dataset_name_or_path, file_format='parquet', split='train') - mock_load_dataset.assert_called_once_with("jan-hq-test/test-dataset", file_format='parquet', split='train') + mock_load_dataset.assert_called_once_with(self.dataset_name_or_path, file_format='parquet', split='train') self.assertEqual(dataset, mock_dataset) - @test_name("AutoModelForCausalLM from_pretrained") + @test_name("Load Causal LM model from Minio") def test_auto_model_for_causal_lm_from_pretrained(self): with patch('s3helper.S3HelperAutoModelForCausalLM.from_pretrained') as mock_from_pretrained: - model_name = "jan-hq-test/tokenizer-tinyllama" mock_model = MagicMock() mock_from_pretrained.return_value = mock_model - model = S3HelperAutoModelForCausalLM.from_pretrained(model_name) + model = S3HelperAutoModelForCausalLM.from_pretrained(self.model_name_or_path) - mock_from_pretrained.assert_called_once_with(model_name) + mock_from_pretrained.assert_called_once_with(self.model_name_or_path) self.assertEqual(model, mock_model) - @test_name("AutoConfig from_pretrained") + @test_name("Load Model Config from Minio") def test_auto_config_from_pretrained(self): with patch('s3helper.S3HelperAutoConfig.from_pretrained') as mock_from_pretrained: - model_name = "jan-hq-test/tokenizer-tinyllama" mock_config = MagicMock() mock_from_pretrained.return_value = mock_config - config = S3HelperAutoConfig.from_pretrained(model_name) + config = S3HelperAutoConfig.from_pretrained(self.model_name_or_path) - mock_from_pretrained.assert_called_once_with(model_name) + mock_from_pretrained.assert_called_once_with(self.model_name_or_path) self.assertEqual(config, mock_config) if __name__ == "__main__": From ca491878c669b9a7b721ae6b17ff5ddb70f047da Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Thu, 18 Jul 2024 14:54:56 +0700 Subject: [PATCH 3/4] correct bucket name --- tests/test_flows.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_flows.py b/tests/test_flows.py index 5200c28..a0bd474 100644 --- a/tests/test_flows.py +++ b/tests/test_flows.py @@ -89,7 +89,7 @@ class TestS3Helper(unittest.TestCase): @classmethod def setUpClass(cls): # Set up any necessary test environment - cls.model_name_or_path = "jan-hq-test/tinyllama-v1.1" + cls.model_name_or_path = "jan-hq-test/tokenizer-tinyllama" cls.dataset_name_or_path = "jan-hq-test/test-dataset" From 8d2b13f1e4a759fc49210afc058f8d246c8cd743 Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Thu, 18 Jul 2024 15:11:59 +0700 Subject: [PATCH 4/4] debug and add test singleton behaviour --- tests/test_flows.py | 125 ++++++++++++++++++++++++-------------------- 1 file changed, 67 insertions(+), 58 deletions(-) diff --git a/tests/test_flows.py b/tests/test_flows.py index a0bd474..491f4bb 100644 --- a/tests/test_flows.py +++ b/tests/test_flows.py @@ -1,16 +1,9 @@ import unittest -import os from unittest.mock import patch, MagicMock -from io import StringIO +import logging import time -from s3helper import ( - S3Helper, - S3HelperAutoConfig, - S3HelperAutoTokenizer, - S3HelperAutoModelForCausalLM, - s3_load_dataset, -) -import sys +from io import StringIO +from s3helper import S3Helper, S3HelperAutoConfig, S3HelperAutoTokenizer, S3HelperAutoModelForCausalLM, s3_load_dataset class CustomTestResult(unittest.TestResult): def __init__(self, *args, **kwargs): @@ -57,7 +50,6 @@ def print_results(self): print("\nDetailed Results:") for result, time_taken in self.results: - # todo: add time taken for each test for test in result.successes: print(f"PASS: {test._testMethodName}") for test, _ in result.failures: @@ -88,62 +80,79 @@ def decorator(func): class TestS3Helper(unittest.TestCase): @classmethod def setUpClass(cls): - # Set up any necessary test environment - cls.model_name_or_path = "jan-hq-test/tokenizer-tinyllama" - cls.dataset_name_or_path = "jan-hq-test/test-dataset" - + cls.model_name = "jan-hq-test/tokenizer-tinyllama" + cls.dataset_name = "jan-hq-test/test-dataset" + + @test_name("S3Helper Singleton Test") + def test_s3helper_singleton(self): + instance1 = S3Helper() + instance2 = S3Helper() + self.assertIs(instance1, instance2, "S3Helper should return the same instance") - @test_name("Connect to Minio") + @test_name("S3Helper Initialization Test") def test_s3helper_initialization(self): - with patch('s3helper.S3Helper') as mock_s3helper: + try: S3Helper() - mock_s3helper.assert_called_once() - - @test_name("Load tokenizer from Minio") - def test_auto_tokenizer_from_pretrained(self): - with patch('s3helper.S3HelperAutoTokenizer.from_pretrained') as mock_from_pretrained: - mock_tokenizer = MagicMock() - mock_from_pretrained.return_value = mock_tokenizer - - tokenizer = S3HelperAutoTokenizer.from_pretrained(self.model_name_or_path) - - mock_from_pretrained.assert_called_once_with(self.model_name_or_path) - self.assertEqual(tokenizer, mock_tokenizer) + except Exception as e: + self.fail(f"S3Helper initialization raised an exception: {e}") - @test_name("Load dataset from Minio") - def test_s3_load_dataset(self): - with patch('s3helper.s3_load_dataset') as mock_load_dataset: - mock_dataset = MagicMock() - mock_load_dataset.return_value = mock_dataset + @test_name("Tokenizer Loading Test") + @patch('s3helper.S3HelperAutoTokenizer.from_pretrained') + def test_tokenizer_loading(self, mock_from_pretrained): + mock_tokenizer = MagicMock() + mock_from_pretrained.return_value = mock_tokenizer - dataset = s3_load_dataset(self.dataset_name_or_path, file_format='parquet', split='train') - - mock_load_dataset.assert_called_once_with(self.dataset_name_or_path, file_format='parquet', split='train') - self.assertEqual(dataset, mock_dataset) - - @test_name("Load Causal LM model from Minio") - def test_auto_model_for_causal_lm_from_pretrained(self): - with patch('s3helper.S3HelperAutoModelForCausalLM.from_pretrained') as mock_from_pretrained: - mock_model = MagicMock() - mock_from_pretrained.return_value = mock_model + tokenizer = S3HelperAutoTokenizer.from_pretrained(self.model_name) + + mock_from_pretrained.assert_called_once_with(self.model_name) + self.assertIsNotNone(tokenizer) + self.assertEqual(tokenizer, mock_tokenizer) - model = S3HelperAutoModelForCausalLM.from_pretrained(self.model_name_or_path) + @test_name("Dataset Loading Test") + @patch('s3helper.s3_load_dataset') + def test_dataset_loading(self, mock_s3_load_dataset): + mock_dataset = MagicMock() + mock_s3_load_dataset.return_value = mock_dataset - mock_from_pretrained.assert_called_once_with(self.model_name_or_path) - self.assertEqual(model, mock_model) + dataset = s3_load_dataset(self.dataset_name, file_format='parquet', split='train') + + mock_s3_load_dataset.assert_called_once_with(self.dataset_name, file_format='parquet', split='train') + self.assertIsNotNone(dataset) + self.assertEqual(dataset, mock_dataset) - @test_name("Load Model Config from Minio") - def test_auto_config_from_pretrained(self): - with patch('s3helper.S3HelperAutoConfig.from_pretrained') as mock_from_pretrained: - mock_config = MagicMock() - mock_from_pretrained.return_value = mock_config + @test_name("Config Loading Test") + @patch('s3helper.S3HelperAutoConfig.from_pretrained') + def test_config_loading(self, mock_from_pretrained): + mock_config = MagicMock() + mock_from_pretrained.return_value = mock_config - config = S3HelperAutoConfig.from_pretrained(self.model_name_or_path) + config = S3HelperAutoConfig.from_pretrained(self.model_name) + + mock_from_pretrained.assert_called_once_with(self.model_name) + self.assertIsNotNone(config) + self.assertEqual(config, mock_config) - mock_from_pretrained.assert_called_once_with(self.model_name_or_path) - self.assertEqual(config, mock_config) + @test_name("Model Loading Test") + @patch('s3helper.S3HelperAutoModelForCausalLM.from_pretrained') + def test_model_loading(self, mock_from_pretrained): + mock_model = MagicMock() + mock_from_pretrained.return_value = mock_model -if __name__ == "__main__": - runner = CustomTestRunner(stream=sys.stdout, verbosity=2) - unittest.main(argv=['first-arg-is-ignored'], exit=False, testRunner=runner) + model = S3HelperAutoModelForCausalLM.from_pretrained(self.model_name) + + mock_from_pretrained.assert_called_once_with(self.model_name) + self.assertIsNotNone(model) + self.assertEqual(model, mock_model) + + @test_name("S3Helper AWS Credentials Test") + @patch.object(S3Helper, '_S3Helper__instance', None) # Reset singleton for this test + @patch('boto3.client') + def test_s3helper_aws_credentials(self, mock_boto3_client): + S3Helper() + mock_boto3_client.assert_called_once_with('s3') + +if __name__ == '__main__': + runner = CustomTestRunner() + test_suite = unittest.TestLoader().loadTestsFromTestCase(TestS3Helper) + result = runner.run(test_suite) runner.print_results() \ No newline at end of file