Skip to content

Commit

Permalink
[tests] Add jsonl e2e tests (#2976)
Browse files Browse the repository at this point in the history
* fix small doc typo

* add jsonl e2e tests

* pylint
  • Loading branch information
jaegeral authored Nov 8, 2023
1 parent 9d51409 commit d492c87
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 6 deletions.
2 changes: 1 addition & 1 deletion docs/developers/testing.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ breakpoint()
And then within the docker container execute

```shell
! python3 -m pytest /usr/local/src/timesketchtimesketch/lib/emojis_test.py -s -pdb
! python3 -m pytest /usr/local/src/timesketch/timesketch/lib/emojis_test.py -s -pdb
```

## end2end tests
Expand Down
21 changes: 18 additions & 3 deletions end_to_end_tests/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,11 @@ def import_timeline(self, filename, index_name=None, sketch=None):
streamer.set_sketch(sketch)
streamer.set_timeline_name(file_path)
streamer.set_index_name(index_name)
streamer.set_provider("e2e test interface")
streamer.add_file(file_path)
timeline = streamer.timeline
if not timeline:
print("Error creating timeline, please try again.")

# Poll the timeline status and wait for the timeline to be ready
max_time_seconds = 600 # Timeout after 10min
Expand All @@ -96,8 +99,18 @@ def import_timeline(self, filename, index_name=None, sketch=None):
while True:
if retry_count >= max_retries:
raise TimeoutError
_ = timeline.lazyload_data(refresh_cache=True)
status = timeline.status

try:
if not timeline:
print("Error no timeline yet, trying to get the new one")
timeline = streamer.timeline
_ = timeline.lazyload_data(refresh_cache=True)
status = timeline.status
except AttributeError:
# The timeline is not ready yet, so we need to wait
retry_count += 1
time.sleep(sleep_time_seconds)
continue

if not timeline.index:
retry_count += 1
Expand All @@ -106,7 +119,9 @@ def import_timeline(self, filename, index_name=None, sketch=None):

if status == "fail" or timeline.index.status == "fail":
if retry_count > 3:
raise RuntimeError("Unable to import timeline.")
raise RuntimeError(
f"Unable to import timeline {timeline.index.id}."
)

if status == "ready" and timeline.index.status == "ready":
break
Expand Down
106 changes: 104 additions & 2 deletions end_to_end_tests/upload_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,108 @@ def test_invalid_index_name(self):
with self.assertions.assertRaises(RuntimeError):
self.import_timeline("evtx.plaso", index_name="/invalid/index/name")

def test_normal_upload_json(self):
"""Test the upload of a json file with a few events."""
# create a new sketch
rand = random.randint(0, 10000)
sketch = self.api.create_sketch(name=f"test_normal_upload_json {rand}")
self.sketch = sketch

file_path = (
"/usr/local/src/timesketch/end_to_end_tests/test_data/sigma_events.jsonl"
)
self.import_timeline(file_path, index_name=rand, sketch=sketch)
timeline = sketch.list_timelines()[0]
# check that timeline was uploaded correctly
self.assertions.assertEqual(timeline.name, file_path)
self.assertions.assertEqual(timeline.index.name, str(rand))
self.assertions.assertEqual(timeline.index.status, "ready")

events = sketch.explore("*", as_pandas=True)
self.assertions.assertEqual(len(events), 4)

def test_large_upload_jsonl(self):
"""Test uploading a timeline with a lot of events as jsonl. The test
will create a temporary file with a large number of events and then
upload the file to Timesketch. The test will then check that the
number of events in the timeline is correct."""

# create a new sketch
rand = random.randint(0, 10000)
sketch = self.api.create_sketch(name=f"test_large_upload_json {rand}")
self.sketch = sketch

file_path = "/tmp/large.jsonl"

with open(file_path, "w", encoding="utf-8") as file_object:
for i in range(4123):
string = f'{{"message":"Count {i} {rand}","timestamp":"123456789","datetime":"2015-07-24T19:01:01+00:00","timestamp_desc":"Write time","data_type":"foobarjson"}}\n' # pylint: disable=line-too-long
file_object.write(string)

self.import_timeline("/tmp/large.jsonl", index_name=rand, sketch=sketch)
os.remove(file_path)

timeline = sketch.list_timelines()[0]
# check that timeline was uploaded correctly
self.assertions.assertEqual(timeline.name, file_path)
self.assertions.assertEqual(timeline.index.name, str(rand))
self.assertions.assertEqual(timeline.index.status, "ready")

search_obj = search.Search(sketch)
search_obj.query_string = "data_type:foobarjson"
search_obj.commit()
self.assertions.assertEqual(len(search_obj.table), 4123)

# check that the number of events is correct with a different method
events = sketch.explore("data_type:foobarjson", as_pandas=True)
self.assertions.assertEqual(len(events), 4123)

def test_very_large_upload_jsonl(self):
"""Test uploading a timeline with over 50 k events as jsonl. The test
will create a temporary file and then
upload the file to Timesketch. The test will check that the
number of events in the timeline is correct."""

# create a new sketch
rand = random.randint(0, 10000)
sketch = self.api.create_sketch(name=f"test__very_large_upload_json {rand}")
self.sketch = sketch

file_path = "/tmp/verylarge.jsonl"

with open(file_path, "w", encoding="utf-8") as file_object:
for i in range(74251):
string = f'{{"message":"Count {i} {rand}","timestamp":"123456789","datetime":"2015-07-24T19:01:01+00:00","timestamp_desc":"Write time","data_type":"foobarjsonverlarge"}}\n' # pylint: disable=line-too-long
file_object.write(string)

self.import_timeline(file_path, index_name=rand, sketch=sketch)
os.remove(file_path)

timeline = sketch.list_timelines()[0]
# check that timeline was uploaded correctly
self.assertions.assertEqual(timeline.name, file_path)
self.assertions.assertEqual(timeline.index.name, str(rand))
self.assertions.assertEqual(timeline.index.status, "ready")

search_obj = search.Search(sketch)
search_obj.query_string = "data_type:foobarjsonverlarge"
search_obj.commit()

# normal max query limit
self.assertions.assertEqual(len(search_obj.table), 10000)
self.assertions.assertEqual(search_obj.expected_size, 74251)

# increase max entries returned:
search_obj.max_entries = 100000
search_obj.commit()
self.assertions.assertEqual(len(search_obj.table), 74251)

# check that the number of events is correct with a different method
events = sketch.explore(
"data_type:foobarjsonverlarge", as_pandas=True, max_entries=100000
)
self.assertions.assertEqual(len(events), 74251)

def test_large_upload_csv(self):
"""Test uploading a timeline with an a lot of events.
The test will create a temporary file with a large number of events
Expand All @@ -44,7 +146,7 @@ def test_large_upload_csv(self):

file_path = "/tmp/large.csv"

with open(file_path, "w") as file_object:
with open(file_path, "w", encoding="utf-8") as file_object:
file_object.write(
'"message","timestamp","datetime","timestamp_desc","data_type"\n'
)
Expand Down Expand Up @@ -89,7 +191,7 @@ def test_large_upload_csv_over_flush_limit(self):

file_path = "/tmp/verylarge.csv"

with open(file_path, "w") as file_object:
with open(file_path, "w", encoding="utf-8") as file_object:
file_object.write(
'"message","timestamp","datetime","timestamp_desc","data_type"\n'
)
Expand Down

0 comments on commit d492c87

Please sign in to comment.