[tests] Add jsonl e2e tests (#2976)

* fix small doc typo * add jsonl e2e tests * pylint
google · Nov 8, 2023 · d492c87 · d492c87
1 parent 9d51409
commit d492c87
Show file tree

Hide file tree

Showing 3 changed files with 123 additions and 6 deletions.
diff --git a/docs/developers/testing.md b/docs/developers/testing.md
@@ -71,7 +71,7 @@ breakpoint()
 And then within the docker container execute
 
 ```shell
-! python3 -m pytest /usr/local/src/timesketchtimesketch/lib/emojis_test.py -s -pdb
+! python3 -m pytest /usr/local/src/timesketch/timesketch/lib/emojis_test.py -s -pdb
 ```
 
 ## end2end tests

diff --git a/end_to_end_tests/interface.py b/end_to_end_tests/interface.py
@@ -85,8 +85,11 @@ def import_timeline(self, filename, index_name=None, sketch=None):
             streamer.set_sketch(sketch)
             streamer.set_timeline_name(file_path)
             streamer.set_index_name(index_name)
+            streamer.set_provider("e2e test interface")
             streamer.add_file(file_path)
             timeline = streamer.timeline
+            if not timeline:
+                print("Error creating timeline, please try again.")
 
         # Poll the timeline status and wait for the timeline to be ready
         max_time_seconds = 600  # Timeout after 10min
@@ -96,8 +99,18 @@ def import_timeline(self, filename, index_name=None, sketch=None):
         while True:
             if retry_count >= max_retries:
                 raise TimeoutError
-            _ = timeline.lazyload_data(refresh_cache=True)
-            status = timeline.status
+
+            try:
+                if not timeline:
+                    print("Error no timeline yet, trying to get the new one")
+                    timeline = streamer.timeline
+                _ = timeline.lazyload_data(refresh_cache=True)
+                status = timeline.status
+            except AttributeError:
+                # The timeline is not ready yet, so we need to wait
+                retry_count += 1
+                time.sleep(sleep_time_seconds)
+                continue
 
             if not timeline.index:
                 retry_count += 1
@@ -106,7 +119,9 @@ def import_timeline(self, filename, index_name=None, sketch=None):
 
             if status == "fail" or timeline.index.status == "fail":
                 if retry_count > 3:
-                    raise RuntimeError("Unable to import timeline.")
+                    raise RuntimeError(
+                        f"Unable to import timeline {timeline.index.id}."
+                    )
 
             if status == "ready" and timeline.index.status == "ready":
                 break

diff --git a/end_to_end_tests/upload_test.py b/end_to_end_tests/upload_test.py
@@ -30,6 +30,108 @@ def test_invalid_index_name(self):
         with self.assertions.assertRaises(RuntimeError):
             self.import_timeline("evtx.plaso", index_name="/invalid/index/name")
 
+    def test_normal_upload_json(self):
+        """Test the upload of a json file with a few events."""
+        # create a new sketch
+        rand = random.randint(0, 10000)
+        sketch = self.api.create_sketch(name=f"test_normal_upload_json {rand}")
+        self.sketch = sketch
+
+        file_path = (
+            "/usr/local/src/timesketch/end_to_end_tests/test_data/sigma_events.jsonl"
+        )
+        self.import_timeline(file_path, index_name=rand, sketch=sketch)
+        timeline = sketch.list_timelines()[0]
+        # check that timeline was uploaded correctly
+        self.assertions.assertEqual(timeline.name, file_path)
+        self.assertions.assertEqual(timeline.index.name, str(rand))
+        self.assertions.assertEqual(timeline.index.status, "ready")
+
+        events = sketch.explore("*", as_pandas=True)
+        self.assertions.assertEqual(len(events), 4)
+
+    def test_large_upload_jsonl(self):
+        """Test uploading a timeline with a lot of events as jsonl. The test
+        will create a temporary file with a large number of events and then
+        upload the file to Timesketch. The test will then check that the
+        number of events in the timeline is correct."""
+
+        # create a new sketch
+        rand = random.randint(0, 10000)
+        sketch = self.api.create_sketch(name=f"test_large_upload_json {rand}")
+        self.sketch = sketch
+
+        file_path = "/tmp/large.jsonl"
+
+        with open(file_path, "w", encoding="utf-8") as file_object:
+            for i in range(4123):
+                string = f'{{"message":"Count {i} {rand}","timestamp":"123456789","datetime":"2015-07-24T19:01:01+00:00","timestamp_desc":"Write time","data_type":"foobarjson"}}\n'  # pylint: disable=line-too-long
+                file_object.write(string)
+
+        self.import_timeline("/tmp/large.jsonl", index_name=rand, sketch=sketch)
+        os.remove(file_path)
+
+        timeline = sketch.list_timelines()[0]
+        # check that timeline was uploaded correctly
+        self.assertions.assertEqual(timeline.name, file_path)
+        self.assertions.assertEqual(timeline.index.name, str(rand))
+        self.assertions.assertEqual(timeline.index.status, "ready")
+
+        search_obj = search.Search(sketch)
+        search_obj.query_string = "data_type:foobarjson"
+        search_obj.commit()
+        self.assertions.assertEqual(len(search_obj.table), 4123)
+
+        # check that the number of events is correct with a different method
+        events = sketch.explore("data_type:foobarjson", as_pandas=True)
+        self.assertions.assertEqual(len(events), 4123)
+
+    def test_very_large_upload_jsonl(self):
+        """Test uploading a timeline with over 50 k events as jsonl. The test
+        will create a temporary file and then
+        upload the file to Timesketch. The test will check that the
+        number of events in the timeline is correct."""
+
+        # create a new sketch
+        rand = random.randint(0, 10000)
+        sketch = self.api.create_sketch(name=f"test__very_large_upload_json {rand}")
+        self.sketch = sketch
+
+        file_path = "/tmp/verylarge.jsonl"
+
+        with open(file_path, "w", encoding="utf-8") as file_object:
+            for i in range(74251):
+                string = f'{{"message":"Count {i} {rand}","timestamp":"123456789","datetime":"2015-07-24T19:01:01+00:00","timestamp_desc":"Write time","data_type":"foobarjsonverlarge"}}\n'  # pylint: disable=line-too-long
+                file_object.write(string)
+
+        self.import_timeline(file_path, index_name=rand, sketch=sketch)
+        os.remove(file_path)
+
+        timeline = sketch.list_timelines()[0]
+        # check that timeline was uploaded correctly
+        self.assertions.assertEqual(timeline.name, file_path)
+        self.assertions.assertEqual(timeline.index.name, str(rand))
+        self.assertions.assertEqual(timeline.index.status, "ready")
+
+        search_obj = search.Search(sketch)
+        search_obj.query_string = "data_type:foobarjsonverlarge"
+        search_obj.commit()
+
+        # normal max query limit
+        self.assertions.assertEqual(len(search_obj.table), 10000)
+        self.assertions.assertEqual(search_obj.expected_size, 74251)
+
+        # increase max entries returned:
+        search_obj.max_entries = 100000
+        search_obj.commit()
+        self.assertions.assertEqual(len(search_obj.table), 74251)
+
+        # check that the number of events is correct with a different method
+        events = sketch.explore(
+            "data_type:foobarjsonverlarge", as_pandas=True, max_entries=100000
+        )
+        self.assertions.assertEqual(len(events), 74251)
+
     def test_large_upload_csv(self):
         """Test uploading a timeline with an a lot of events.
         The test will create a temporary file with a large number of events
@@ -44,7 +146,7 @@ def test_large_upload_csv(self):
 
         file_path = "/tmp/large.csv"
 
-        with open(file_path, "w") as file_object:
+        with open(file_path, "w", encoding="utf-8") as file_object:
             file_object.write(
                 '"message","timestamp","datetime","timestamp_desc","data_type"\n'
             )
@@ -89,7 +191,7 @@ def test_large_upload_csv_over_flush_limit(self):
 
         file_path = "/tmp/verylarge.csv"
 
-        with open(file_path, "w") as file_object:
+        with open(file_path, "w", encoding="utf-8") as file_object:
             file_object.write(
                 '"message","timestamp","datetime","timestamp_desc","data_type"\n'
             )