From 9d84fc1142a73cf9924b8914bd728735cd1b6ef1 Mon Sep 17 00:00:00 2001 From: jaegeral Date: Wed, 4 Sep 2024 12:47:05 +0000 Subject: [PATCH 1/7] add a few unittests for timestamp conversaion --- .../validate_no_datetime_timestamps.csv | 6 ++++ timesketch/lib/utils_test.py | 32 +++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 test_tools/test_events/validate_no_datetime_timestamps.csv diff --git a/test_tools/test_events/validate_no_datetime_timestamps.csv b/test_tools/test_events/validate_no_datetime_timestamps.csv new file mode 100644 index 0000000000..55e4f1c5fd --- /dev/null +++ b/test_tools/test_events/validate_no_datetime_timestamps.csv @@ -0,0 +1,6 @@ +"message","timestamp","datetime","timestamp_desc","data_type" +"No datetime given","1435789661000000","","Logging","No_datetime" +"Whitespace datetime","1437789661000000"," ","Logging","Whitespace_datetime" +"No Timestamp1","","2015-07-25 02:01:01+00:00","Logging","No timestamp1" +"No Timestamp2",,"2014-07-25 02:01:01+00:00","Logging","No timestamp2" +"Whitespace Timestamp"," ","2016-07-25 02:01:01+00:00","Logging","Whitespace timestamp" \ No newline at end of file diff --git a/timesketch/lib/utils_test.py b/timesketch/lib/utils_test.py index 8e7ca0e219..9aad0fc4bd 100644 --- a/timesketch/lib/utils_test.py +++ b/timesketch/lib/utils_test.py @@ -232,6 +232,38 @@ def test_timestamp_is_ISOformat(self): for output in expected_outputs: self.assertDictEqual(next(results), output) + def test_missing_datetime_in_CSV(self): + """Test for parsing a file with missing datetime field does attempt + to get it from timestamp or fail""" + results = iter( + read_and_validate_csv( + "test_tools/test_events/validate_no_datetime_timestamps.csv" + ) + ) + + n = 1 + for item in results: + n = n + 1 + if item["data_type"] == "No timestamp1": + self.assertIsNotNone(item["timestamp"]) + self.assertEqual(item["timestamp"], 1437789661000000) + self.assertIsNotNone(item["datetime"]) + self.assertEqual(item["datetime"], "2015-07-25T02:01:01+00:00") + + elif item["data_type"] == "No timestamp2": + self.assertIsNotNone(item["timestamp"]) + self.assertEqual(item["timestamp"], 1406253661000000) + self.assertIsNotNone(item["datetime"]) + self.assertEqual(item["datetime"], "2014-07-25T02:01:01+00:00") + elif item["data_type"] == "Whitespace datetime": + self.assertIsNotNone(item["timestamp"]) + self.assertEqual(item["datetime"], "2016-07-25T02:01:01+00:00") + self.assertIsNotNone( + item["datetime"] + ) # TODO: This should not be a space + + self.assertGreaterEqual(n, 3) + def test_invalid_JSONL_file(self): """Test for JSONL with missing keys in the dictionary wrt headers mapping""" linedict = {"DT": "2011-11-11", "MSG": "this is a test"} From b8000244f748b1205bcd184216dd843a50d82867 Mon Sep 17 00:00:00 2001 From: jaegeral Date: Wed, 4 Sep 2024 13:59:26 +0000 Subject: [PATCH 2/7] adding some unit tests for parsing from CSV --- test_tools/test_events/invalid_datetime.csv | 5 ++ .../test_events/validate_time_precision.csv | 3 ++ timesketch/lib/utils_test.py | 48 +++++++++++++++++++ 3 files changed, 56 insertions(+) create mode 100644 test_tools/test_events/invalid_datetime.csv create mode 100644 test_tools/test_events/validate_time_precision.csv diff --git a/test_tools/test_events/invalid_datetime.csv b/test_tools/test_events/invalid_datetime.csv new file mode 100644 index 0000000000..1c202607f3 --- /dev/null +++ b/test_tools/test_events/invalid_datetime.csv @@ -0,0 +1,5 @@ +"message","timestamp","datetime","timestamp_desc","data_type" +"Missing timezone info","123456","2017-09-24 19:01:01","Write time","Missing_timezone_info" +"Wrong epoch","123456","2017-07-24T19:01:01","Write time","wrong_timestamp" +"Wrong epoch","9999999999999","2017-10-24 19:01:01","Write time","long_timestamp" +"Wrong epoch","88888888","1234 19:01:01","Write time","wrong_datetime_1" \ No newline at end of file diff --git a/test_tools/test_events/validate_time_precision.csv b/test_tools/test_events/validate_time_precision.csv new file mode 100644 index 0000000000..06986b799e --- /dev/null +++ b/test_tools/test_events/validate_time_precision.csv @@ -0,0 +1,3 @@ +"message","timestamp","datetime","timestamp_desc","data_type" +"total precision in datetime","123456789","2024-07-24T10:57:02.877297Z","Write time","timestamptest1" +"precision in timestamp","1331698658276340","2015-07-24T19:01:01+00:00","Write time","timestamptest2" \ No newline at end of file diff --git a/timesketch/lib/utils_test.py b/timesketch/lib/utils_test.py index 9aad0fc4bd..992f5d74ff 100644 --- a/timesketch/lib/utils_test.py +++ b/timesketch/lib/utils_test.py @@ -264,6 +264,54 @@ def test_missing_datetime_in_CSV(self): self.assertGreaterEqual(n, 3) + def test_time_datetime_valueerror(self): + """Test for parsing a file with time precision + + The file is currently parsed as: + {'message': 'Missing timezone info', 'timestamp': 123456, + 'datetime': '2017-09-24T19:01:01', + 'timestamp_desc': 'Write time', + 'data_type': 'Missing_timezone_info'} + {'message': 'Wrong epoch', 'timestamp': 123456, + 'datetime': '2017-07-24T19:01:01', + 'timestamp_desc': 'Write time', + 'data_type': 'wrong_timestamp'} + {'message': 'Wrong epoch', 'timestamp': 9999999999999, + 'datetime': '2017-10-24T19:01:01', + 'timestamp_desc': 'Write time', + 'data_type': 'long_timestamp'} + + """ + + results = iter( + read_and_validate_csv("test_tools/test_events/invalid_datetime.csv") + ) + results_list = [] + for item in results: + results_list.append(item) + self.assertIsNotNone(item) + # check that certain values are not present in results_list + self.assertNotIn( + "wrong_datetime_1", + str(results_list), + "Parsed line is in results but should be skipped", + ) + self.assertIn("long_timestamp", str(results_list)) + + def test_time_precision_in_csv(self): + """Test for parsing a file with time precision""" + results = iter( + read_and_validate_csv("test_tools/test_events/validate_time_precision.csv") + ) + results_list = [] + for item in results: + results_list.append(item) + self.assertIsNotNone(item["timestamp"]) + + self.assertIn("timestamptest1", str(results_list)) + self.assertIn("2024-07-24T10:57:02.877297+00:00", str(results_list)) + self.assertIn("timestamptest2", str(results_list)) + def test_invalid_JSONL_file(self): """Test for JSONL with missing keys in the dictionary wrt headers mapping""" linedict = {"DT": "2011-11-11", "MSG": "this is a test"} From cb1c07aef047801f1c5496137b9cb9052abb1831 Mon Sep 17 00:00:00 2001 From: jaegeral Date: Tue, 10 Sep 2024 14:53:40 +0000 Subject: [PATCH 3/7] adding unit tests for fixing dataframes datetime / timestamp --- .../timesketch_import_client/importer_test.py | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/importer_client/python/timesketch_import_client/importer_test.py b/importer_client/python/timesketch_import_client/importer_test.py index 528e4edbfa..adc1aa5900 100644 --- a/importer_client/python/timesketch_import_client/importer_test.py +++ b/importer_client/python/timesketch_import_client/importer_test.py @@ -110,6 +110,8 @@ def setUp(self): self.frame = pandas.DataFrame(self.lines) + self._importer = importer.ImportStreamer() + def test_adding_data_frames(self): """Test adding a data frame to the importer.""" with MockStreamer() as streamer: @@ -175,6 +177,67 @@ def test_adding_json(self): streamer.flush() self._run_all_tests(streamer.columns, streamer.lines) + def test_fix_data_frame(self): + """Test fixing a data frame. + create a pandas dataframe with timestamp, datetime, message and data_type + columns and check some basics that the method is actually working. + """ + + data_frame = pandas.DataFrame( + { + "timestamp": ["1435789661000000"], + "stuff": ["foobar"], + "correct": [True], + "random_number": [11332], + "vital_stats": ["ille"], + "datetime": ["2019-01-03T02:39:42"], + } + ) + fixed_frame = self._importer._fix_data_frame(data_frame) + self.assertIsNotNone(fixed_frame) + + self.assertIs("ille" in fixed_frame["vital_stats"].values, True) + print(fixed_frame["datetime"].values) + self.assertIs( + "2019-01-03T02:39:42+0000" in fixed_frame["datetime"].values, True + ) + + def test_fix_data_frame_precision_datetime(self): + """Test fixing a data frame with a datetime hat has microsecond precision.""" + + data_frame = pandas.DataFrame( + { + "timestamp": ["1456"], + "datetime": ["2024-07-24T10:57:02.877297Z"], + } + ) + fixed_frame = self._importer._fix_data_frame(data_frame) + self.assertIsNotNone(fixed_frame) + + print(fixed_frame["datetime"].values) + self.assertIs( + "2024-07-24T10:57:02+0000" in fixed_frame["datetime"].values, True + ) + + def test_fix_data_frame_precision_timestamp(self): + """Test fixing a data frame with a timestamp hat has microsecond precision.""" + + data_frame = pandas.DataFrame( + { + "timestamp": ["1331698658276340"], + "datetime": ["1985-01-21T10:57:02.25Z"], + } + ) + fixed_frame = self._importer._fix_data_frame(data_frame) + self.assertIsNotNone(fixed_frame) + + print(fixed_frame["datetime"].values) + print(fixed_frame["timestamp"].values) + self.assertIs( + "1985-01-21T10:57:02+0000" in fixed_frame["datetime"].values, True + ) + self.assertIs("1331698658276340" in fixed_frame["timestamp"].values, True) + def _run_all_tests(self, columns, lines): """Run all tests on the result set of a streamer.""" # The first line is the column line. From 271068bdfe19c2f3b4ce3622c6a1602ef6ce8285 Mon Sep 17 00:00:00 2001 From: jaegeral Date: Tue, 10 Sep 2024 15:18:40 +0000 Subject: [PATCH 4/7] fix lint --- .../python/timesketch_import_client/importer_test.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/importer_client/python/timesketch_import_client/importer_test.py b/importer_client/python/timesketch_import_client/importer_test.py index adc1aa5900..38ce0a91a7 100644 --- a/importer_client/python/timesketch_import_client/importer_test.py +++ b/importer_client/python/timesketch_import_client/importer_test.py @@ -193,7 +193,9 @@ def test_fix_data_frame(self): "datetime": ["2019-01-03T02:39:42"], } ) - fixed_frame = self._importer._fix_data_frame(data_frame) + fixed_frame = self._importer._fix_data_frame( + data_frame + ) # pylint: disable=protected-access self.assertIsNotNone(fixed_frame) self.assertIs("ille" in fixed_frame["vital_stats"].values, True) @@ -211,7 +213,9 @@ def test_fix_data_frame_precision_datetime(self): "datetime": ["2024-07-24T10:57:02.877297Z"], } ) - fixed_frame = self._importer._fix_data_frame(data_frame) + fixed_frame = self._importer._fix_data_frame( + data_frame + ) # pylint: disable=protected-access self.assertIsNotNone(fixed_frame) print(fixed_frame["datetime"].values) @@ -228,7 +232,9 @@ def test_fix_data_frame_precision_timestamp(self): "datetime": ["1985-01-21T10:57:02.25Z"], } ) - fixed_frame = self._importer._fix_data_frame(data_frame) + fixed_frame = self._importer._fix_data_frame( + data_frame + ) # pylint: disable=protected-access self.assertIsNotNone(fixed_frame) print(fixed_frame["datetime"].values) From b2093a8535dd3869654f0a98c5e09cde0ef8d984 Mon Sep 17 00:00:00 2001 From: jaegeral Date: Tue, 10 Sep 2024 15:23:21 +0000 Subject: [PATCH 5/7] pylint2 --- .../timesketch_import_client/importer_test.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/importer_client/python/timesketch_import_client/importer_test.py b/importer_client/python/timesketch_import_client/importer_test.py index 38ce0a91a7..e62a70060c 100644 --- a/importer_client/python/timesketch_import_client/importer_test.py +++ b/importer_client/python/timesketch_import_client/importer_test.py @@ -177,6 +177,7 @@ def test_adding_json(self): streamer.flush() self._run_all_tests(streamer.columns, streamer.lines) + # pylint: disable=protected-access def test_fix_data_frame(self): """Test fixing a data frame. create a pandas dataframe with timestamp, datetime, message and data_type @@ -193,9 +194,7 @@ def test_fix_data_frame(self): "datetime": ["2019-01-03T02:39:42"], } ) - fixed_frame = self._importer._fix_data_frame( - data_frame - ) # pylint: disable=protected-access + fixed_frame = self._importer._fix_data_frame(data_frame) self.assertIsNotNone(fixed_frame) self.assertIs("ille" in fixed_frame["vital_stats"].values, True) @@ -213,9 +212,7 @@ def test_fix_data_frame_precision_datetime(self): "datetime": ["2024-07-24T10:57:02.877297Z"], } ) - fixed_frame = self._importer._fix_data_frame( - data_frame - ) # pylint: disable=protected-access + fixed_frame = self._importer._fix_data_frame(data_frame) self.assertIsNotNone(fixed_frame) print(fixed_frame["datetime"].values) @@ -232,9 +229,7 @@ def test_fix_data_frame_precision_timestamp(self): "datetime": ["1985-01-21T10:57:02.25Z"], } ) - fixed_frame = self._importer._fix_data_frame( - data_frame - ) # pylint: disable=protected-access + fixed_frame = self._importer._fix_data_frame(data_frame) self.assertIsNotNone(fixed_frame) print(fixed_frame["datetime"].values) @@ -244,6 +239,7 @@ def test_fix_data_frame_precision_timestamp(self): ) self.assertIs("1331698658276340" in fixed_frame["timestamp"].values, True) + # pylint: enable=protected-access def _run_all_tests(self, columns, lines): """Run all tests on the result set of a streamer.""" # The first line is the column line. From 5bc3c4137e45ec109811b417bab16a58734e72d5 Mon Sep 17 00:00:00 2001 From: jaegeral Date: Wed, 11 Sep 2024 09:35:14 +0000 Subject: [PATCH 6/7] remove two prints --- .../python/timesketch_import_client/importer_test.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/importer_client/python/timesketch_import_client/importer_test.py b/importer_client/python/timesketch_import_client/importer_test.py index e62a70060c..2776d7682a 100644 --- a/importer_client/python/timesketch_import_client/importer_test.py +++ b/importer_client/python/timesketch_import_client/importer_test.py @@ -232,8 +232,6 @@ def test_fix_data_frame_precision_timestamp(self): fixed_frame = self._importer._fix_data_frame(data_frame) self.assertIsNotNone(fixed_frame) - print(fixed_frame["datetime"].values) - print(fixed_frame["timestamp"].values) self.assertIs( "1985-01-21T10:57:02+0000" in fixed_frame["datetime"].values, True ) From dc78fd0cd9509e716b583272713dd1f749c59e3f Mon Sep 17 00:00:00 2001 From: jaegeral Date: Wed, 11 Sep 2024 09:36:55 +0000 Subject: [PATCH 7/7] remove a todo that was left from developing --- timesketch/lib/utils_test.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/timesketch/lib/utils_test.py b/timesketch/lib/utils_test.py index 992f5d74ff..1abdaa07bc 100644 --- a/timesketch/lib/utils_test.py +++ b/timesketch/lib/utils_test.py @@ -258,9 +258,7 @@ def test_missing_datetime_in_CSV(self): elif item["data_type"] == "Whitespace datetime": self.assertIsNotNone(item["timestamp"]) self.assertEqual(item["datetime"], "2016-07-25T02:01:01+00:00") - self.assertIsNotNone( - item["datetime"] - ) # TODO: This should not be a space + self.assertIsNotNone(item["datetime"]) self.assertGreaterEqual(n, 3)