From ab2f67d7ee4eba2e40ad7bd215d6f0a050ab11aa Mon Sep 17 00:00:00 2001 From: Dan Kortschak <90160302+efd6@users.noreply.github.com> Date: Mon, 4 Sep 2023 18:52:52 +0930 Subject: [PATCH] x-pack/filebeat/input/httpjson: make response body decoding errors more informative (#36481) The default rendering of errors from the json, csv and xml decoders can be a little terse, but the error values themselves contain information that allows the text context of the error to be constructed and returned. It is a common problem that users are unable to decipher the error messages, so add this text context to help. --- CHANGELOG.next.asciidoc | 1 + x-pack/filebeat/input/httpjson/encoding.go | 112 ++++++++++- .../filebeat/input/httpjson/encoding_test.go | 189 ++++++++++++++++-- 3 files changed, 277 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 2f996756908..88c9c2099f8 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -208,6 +208,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff] - [Azure] Add input metrics to the azure-eventhub input. {pull}35739[35739] - Reduce HTTPJSON metrics allocations. {pull}36282[36282] - Add support for a simplified input configuraton when running under Elastic-Agent {pull}36390[36390] +- Make HTTPJSON response body decoding errors more informative. {pull}36481[36481] *Auditbeat* diff --git a/x-pack/filebeat/input/httpjson/encoding.go b/x-pack/filebeat/input/httpjson/encoding.go index 7bf85161789..5dd62f10535 100644 --- a/x-pack/filebeat/input/httpjson/encoding.go +++ b/x-pack/filebeat/input/httpjson/encoding.go @@ -9,9 +9,12 @@ import ( "bytes" "encoding/csv" "encoding/json" + stdxml "encoding/xml" "errors" + "fmt" "io" "net/http" + "unicode" "github.com/elastic/mito/lib/xml" ) @@ -72,7 +75,11 @@ func encodeAsJSON(trReq transformable) ([]byte, error) { // decodeAsJSON decodes the JSON message in p into dst. func decodeAsJSON(p []byte, dst *response) error { - return json.Unmarshal(p, &dst.body) + err := json.Unmarshal(p, &dst.body) + if err != nil { + return textContextError{error: err, body: p} + } + return nil } // encodeAsForm encodes trReq as a URL encoded form. @@ -95,7 +102,7 @@ func decodeAsNdjson(p []byte, dst *response) error { for dec.More() { var o interface{} if err := dec.Decode(&o); err != nil { - return err + return textContextError{error: err, body: p} } results = append(results, o) } @@ -135,7 +142,7 @@ func decodeAsCSV(p []byte, dst *response) error { if err != nil { if err != io.EOF { //nolint:errorlint // csv.Reader never wraps io.EOF. - return err + return textContextError{error: err, body: p} } } @@ -165,7 +172,7 @@ func decodeAsZip(p []byte, dst *response) error { var o interface{} if err := dec.Decode(&o); err != nil { rc.Close() - return err + return textContextError{error: err, body: p} } results = append(results, o) } @@ -185,9 +192,104 @@ func decodeAsZip(p []byte, dst *response) error { func decodeAsXML(p []byte, dst *response) error { cdata, body, err := xml.Unmarshal(bytes.NewReader(p), dst.xmlDetails) if err != nil { - return err + return textContextError{error: err, body: p} } dst.body = body dst.header["XML-CDATA"] = []string{cdata} return nil } + +// textContextError is an error that can provide the text context for +// a decoding error from the csv, json and xml packages. +type textContextError struct { + error + body []byte +} + +func (e textContextError) Error() string { + var ctx []byte + switch err := e.error.(type) { + case nil: + return "" + case *json.SyntaxError: + ctx = textContext(e.body, err.Offset) + case *json.UnmarshalTypeError: + ctx = textContext(e.body, err.Offset) + case *csv.ParseError: + lines := bytes.Split(e.body, []byte{'\n'}) + l := err.Line - 1 // Lines are 1-based. + if uint(l) >= uint(len(lines)) { + return err.Error() + } + ctx = textContext(lines[l], int64(err.Column)) + case *stdxml.SyntaxError: + lines := bytes.Split(e.body, []byte{'\n'}) + l := err.Line - 1 // Lines are 1-based. + if uint(l) >= uint(len(lines)) { + return err.Error() + } + // The xml package does not provide column-level context, + // so just point to first non-whitespace character of the + // line. This doesn't make a great deal of difference + // except in deeply indented XML documents. + pos := bytes.IndexFunc(lines[l], func(r rune) bool { + return !unicode.IsSpace(r) + }) + if pos < 0 { + pos = 0 + } + ctx = textContext(lines[l], int64(pos)) + default: + return err.Error() + } + return fmt.Sprintf("%v: text context %q", e.error, ctx) +} + +func (e textContextError) Unwrap() error { + return e.error +} + +// textContext returns the context of text around the provided position starting +// ten bytes before pos and ten bytes after, dependent on the length of the +// text and the value of pos relative to bounds. If a text truncation is made, +// an ellipsis is added to indicate this. +func textContext(text []byte, pos int64) []byte { + if len(text) == 0 { + return text + } + const ( + dots = "..." + span = 10 + ) + left := maxInt64(0, pos-span) + right := minInt(pos+span+1, int64(len(text))) + ctx := make([]byte, right-left+2*int64(len(dots))) + copy(ctx[3:], text[left:right]) + if left != 0 { + copy(ctx, dots) + left = 0 + } else { + left = int64(len(dots)) + } + if right != int64(len(text)) { + copy(ctx[len(ctx)-len(dots):], dots) + right = int64(len(ctx)) + } else { + right = int64(len(ctx) - len(dots)) + } + return ctx[left:right] +} + +func minInt(a, b int64) int64 { + if a < b { + return a + } + return b +} + +func maxInt64(a, b int64) int64 { + if a > b { + return a + } + return b +} diff --git a/x-pack/filebeat/input/httpjson/encoding_test.go b/x-pack/filebeat/input/httpjson/encoding_test.go index 1a835bfd941..c42e8c65588 100644 --- a/x-pack/filebeat/input/httpjson/encoding_test.go +++ b/x-pack/filebeat/input/httpjson/encoding_test.go @@ -8,6 +8,7 @@ import ( "archive/zip" "bytes" "encoding/json" + "net/http" "net/url" "testing" @@ -61,29 +62,103 @@ func TestDecodeZip(t *testing.T) { assert.Equal(t, []string{"a.json", "b.ndjson", "c.ndjson"}, resp.header["X-Zip-Files"]) } +func TestDecodeJSON(t *testing.T) { + tests := []struct { + body string + result string + err string + }{ + { + body: "{}", + result: "{}", + }, + { + body: "{\"a\":\"b\"}", + result: "{\"a\":\"b\"}", + }, + { + body: "[{\"a\":\"b\"},\nunfortunate text\n{\"c\":\"d\"}]", + err: `invalid character 'u' looking for beginning of value: text context "...a\":\"b\"},\nunfortunate ..."`, + }, + } + for _, test := range tests { + resp := &response{} + err := decodeAsJSON([]byte(test.body), resp) + if test.err != "" { + assert.Error(t, err) + assert.EqualError(t, err, test.err) + } else { + assert.NoError(t, err) + + var j []byte + if test.body != "" { + j, err = json.Marshal(resp.body) + if err != nil { + t.Fatalf("Marshal failed: %v", err) + } + assert.JSONEq(t, test.result, string(j)) + } else { + assert.Equal(t, test.result, string(j)) + } + } + } +} + func TestDecodeNdjson(t *testing.T) { tests := []struct { body string result string + err string }{ - {"{}", "[{}]"}, - {"{\"a\":\"b\"}", "[{\"a\":\"b\"}]"}, - {"{\"a\":\"b\"}\n{\"c\":\"d\"}", "[{\"a\":\"b\"},{\"c\":\"d\"}]"}, - {"{\"a\":\"b\"}\r\n{\"c\":\"d\"}", "[{\"a\":\"b\"},{\"c\":\"d\"}]"}, - {"{\"a\":\"b\"}\r\n{\"c\":\"d\"}\n", "[{\"a\":\"b\"},{\"c\":\"d\"}]"}, - {"{\"a\":\"b\"}\r\n{\"c\":\"d\"}\r\n", "[{\"a\":\"b\"},{\"c\":\"d\"}]"}, + { + body: "{}", + result: "[{}]", + }, + { + body: "{\"a\":\"b\"}", + result: "[{\"a\":\"b\"}]", + }, + { + body: "{\"a\":\"b\"}\n{\"c\":\"d\"}", + result: "[{\"a\":\"b\"},{\"c\":\"d\"}]", + }, + { + body: "{\"a\":\"b\"}\r\n{\"c\":\"d\"}", + result: "[{\"a\":\"b\"},{\"c\":\"d\"}]", + }, + { + body: "{\"a\":\"b\"}\r\n{\"c\":\"d\"}\n", + result: "[{\"a\":\"b\"},{\"c\":\"d\"}]", + }, + { + body: "{\"a\":\"b\"}\r\n{\"c\":\"d\"}\r\n", + result: "[{\"a\":\"b\"},{\"c\":\"d\"}]", + }, + { + body: "{\"a\":\"b\"}unfortunate text\r\n{\"c\":\"d\"}\r\n", + err: `invalid character 'u' looking for beginning of value: text context "{\"a\":\"b\"}unfortunate ..."`, + }, } for _, test := range tests { resp := &response{} err := decodeAsNdjson([]byte(test.body), resp) - if err != nil { - t.Fatalf("decodeAsNdjson failed: %v", err) - } - j, err := json.Marshal(resp.body) - if err != nil { - t.Fatalf("Marshal failed: %v", err) + if test.err != "" { + assert.Error(t, err) + assert.EqualError(t, err, test.err) + } else { + assert.NoError(t, err) + + var j []byte + if test.body != "" { + j, err = json.Marshal(resp.body) + if err != nil { + t.Fatalf("Marshal failed: %v", err) + } + assert.JSONEq(t, test.result, string(j)) + } else { + assert.Equal(t, test.result, string(j)) + } } - assert.Equal(t, test.result, string(j)) } } @@ -93,20 +168,18 @@ func TestDecodeCSV(t *testing.T) { result string err string }{ - {"", "", ""}, + {body: "", result: ""}, { - "EVENT_TYPE,TIMESTAMP,REQUEST_ID,ORGANIZATION_ID,USER_ID\n" + + body: "EVENT_TYPE,TIMESTAMP,REQUEST_ID,ORGANIZATION_ID,USER_ID\n" + "Login,20211018071353.465,id1,id2,user1\n" + "Login,20211018071505.579,id4,id5,user2\n", - `[{"EVENT_TYPE":"Login","TIMESTAMP":"20211018071353.465","REQUEST_ID":"id1","ORGANIZATION_ID":"id2","USER_ID":"user1"}, + result: `[{"EVENT_TYPE":"Login","TIMESTAMP":"20211018071353.465","REQUEST_ID":"id1","ORGANIZATION_ID":"id2","USER_ID":"user1"}, {"EVENT_TYPE":"Login","TIMESTAMP":"20211018071505.579","REQUEST_ID":"id4","ORGANIZATION_ID":"id5","USER_ID":"user2"}]`, - "", }, { - "EVENT_TYPE,TIMESTAMP,REQUEST_ID,ORGANIZATION_ID,USER_ID\n" + + body: "EVENT_TYPE,TIMESTAMP,REQUEST_ID,ORGANIZATION_ID,USER_ID\n" + "Login,20211018071505.579,id4,user2\n", - "", - "record on line 2: wrong number of fields", + err: "record on line 2: wrong number of fields: text context \"Login,202110...\"", }, } for _, test := range tests { @@ -132,6 +205,62 @@ func TestDecodeCSV(t *testing.T) { } } +func TestDecodeXML(t *testing.T) { + tests := []struct { + body string + result string + err string + }{ + { + body: ` + +

+ Joord Lennart +

+ + Egil's Saga + +
+`, + result: `{"o":{"p":{"n":"Joord Lennart"},"i":{"n":"Egil's Saga"}}}`, + }, + { + body: ` + +

+ Joord Lennart +

+ + Egil's Saga + +
+`, + err: `XML syntax error on line 7: element closed by : text context "... Egil's S..."`, + }, + } + for _, test := range tests { + resp := &response{header: make(http.Header)} + err := decodeAsXML([]byte(test.body), resp) + if test.err != "" { + assert.Error(t, err) + assert.EqualError(t, err, test.err) + } else { + assert.NoError(t, err) + + var j []byte + if test.body != "" { + j, err = json.Marshal(resp.body) + if err != nil { + t.Fatalf("Marshal failed: %v", err) + } + assert.JSONEq(t, test.result, string(j)) + } else { + assert.Equal(t, test.result, string(j)) + } + } + } +} + func TestEncodeAsForm(t *testing.T) { tests := []struct { params map[string]string @@ -159,3 +288,23 @@ func TestEncodeAsForm(t *testing.T) { assert.Equal(t, "application/x-www-form-urlencoded", trReq.header().Get("Content-Type")) } } + +func TestTextContext(t *testing.T) { + tests := []struct { + text string + pos int64 + want string + }{ + {}, + {text: "0987654321*1234567890", pos: 10, want: "0987654321*1234567890"}, + {text: "54321*1234567890xxxxx", pos: 5, want: "54321*1234567890..."}, + {text: "xxxxx0987654321*12345", pos: 15, want: "...0987654321*12345"}, + {text: "x0987654321*1234567890x", pos: 11, want: "...0987654321*1234567890..."}, + } + for _, test := range tests { + got := string(textContext([]byte(test.text), test.pos)) + if got != test.want { + t.Errorf("unexpected result for textContext(%q, %d): got:%q want:%q", test.text, test.pos, got, test.want) + } + } +}