Skip to content

Commit

Permalink
fix: handle JSON character encoding for Splunk HEC (#112)
Browse files Browse the repository at this point in the history
* fix: handle JSON character encoding for hec (Umlauts)

After applying the fix searching for a field with Umlauts is now working correctly.

* style: pre-commit

* test: update integration tests to include fixed scenario

* test: sleep before search

Co-authored-by: Artem Rys <[email protected]>
  • Loading branch information
zszia and artemrys authored Nov 14, 2022
1 parent 01c28c4 commit 699e387
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 4 deletions.
1 change: 1 addition & 0 deletions .github/workflows/build-test-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ jobs:
test-splunk:
name: test-splunk
runs-on: ubuntu-latest
continue-on-error: true
needs:
- meta
strategy:
Expand Down
2 changes: 1 addition & 1 deletion solnlib/modular_input/event.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def _to_hec(self, event_field):
if hasattr(self, "_fields"):
event["fields"] = self._fields

return json.dumps(event)
return json.dumps(event, ensure_ascii=False)

@classmethod
def format_events(cls, events: List, event_field: str = "event") -> List:
Expand Down
2 changes: 1 addition & 1 deletion solnlib/modular_input/event_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ def write_events(
try:
self._rest_client.post(
self.HTTP_EVENT_COLLECTOR_ENDPOINT,
body=event,
body=event.encode("utf-8"),
headers=self.headers,
)
except binding.HTTPError as e:
Expand Down
49 changes: 49 additions & 0 deletions tests/integration/_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#
# Copyright 2021 Splunk Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os.path as op
import sys
import time

sys.path.insert(0, op.dirname(op.dirname(op.abspath(__file__))))
import context
from splunklib import client
from splunklib import results as splunklib_results


def search(session_key, query):
service = client.connect(host=context.host, token=session_key)
job = service.jobs.create(query)
while True:
while not job.is_ready():
pass
stats = {
"isDone": job["isDone"],
"doneProgress": job["doneProgress"],
"scanCount": job["scanCount"],
"eventCount": job["eventCount"],
"resultCount": job["resultCount"],
}
if stats["isDone"] == "1":
break
time.sleep(0.5)
json_results_reader = splunklib_results.JSONResultsReader(
job.results(output_mode="json")
)
results = []
for result in json_results_reader:
if isinstance(result, dict):
results.append(result)
return results
35 changes: 34 additions & 1 deletion tests/integration/test_hec_event_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

import os.path as op
import sys
import time

sys.path.insert(0, op.dirname(op.dirname(op.abspath(__file__))))
import context
from _search import search

from solnlib.modular_input import event_writer as hew

Expand All @@ -36,3 +37,35 @@ def test_hec_event_writer():
m2[i] = "test2 data %s" % i
e2 = ew.create_event(m2, index="main", host="testing", sourcetype="hec")
ew.write_events([e1, e2])


def test_hec_event_writes_with_non_utf_8():
# To test scenario listed in https://github.com/splunk/addonfactory-solutions-library-python/pull/112.
test_name = "test_hec_event_writes_with_non_utf_8"
session_key = context.get_session_key()
ew = hew.HECEventWriter("test", session_key)
event = ew.create_event(
[
{
"test_name": test_name,
"field_a": "Üü_Öö_Ää_some_text",
"field_b": "some_text_Üü_Öö_Ää",
},
],
index="main",
host="testing",
sourcetype="hec",
)
ew.write_events([event])
time.sleep(2)

search_results = search(
session_key, f"search index=main sourcetype=hec {test_name}"
)

assert len(search_results) == 1
_raw_event = search_results[0]["_raw"]
assert "Üü_Öö_Ää_some_text" in _raw_event
assert "some_text_Üü_Öö_Ää" in _raw_event
assert "\\u00dc\\u00fc_\\u00d6\\u00f6_\\u00c4\\u00e4_some_text" not in _raw_event
assert "some_text_\\u00dc\\u00fc_\\u00d6\\u00f6_\\u00c4\\u00e4" not in _raw_event
3 changes: 2 additions & 1 deletion tests/unit/test_modular_input_event_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ def mock_post(
self, path_segment, owner=None, app=None, sharing=None, headers=None, **query
):
event_strings = [
json.dumps(json.loads(e), sort_keys=True) for e in query["body"].split("\n")
json.dumps(json.loads(e), sort_keys=True)
for e in query["body"].decode("utf-8").split("\n")
]

assert (
Expand Down

0 comments on commit 699e387

Please sign in to comment.