From b6f01c2279487342471e8dafbe6fc851c2304202 Mon Sep 17 00:00:00 2001
From: Tamas Papik <trapacska@gmail.com>
Date: Fri, 12 Jul 2019 10:22:42 +0200
Subject: [PATCH] TOOL-739 backspace char (#95)

* TOOL-739 added illegal character filter

* TOOL-739 comment fixture

* added unit test
---
 test/converters/xcresult/xcresult.go      | 31 +++++++++++++++++++++++
 test/converters/xcresult/xcresult_test.go | 15 +++++++++++
 2 files changed, 46 insertions(+)
 create mode 100644 test/converters/xcresult/xcresult_test.go

diff --git a/test/converters/xcresult/xcresult.go b/test/converters/xcresult/xcresult.go
index 8f02141a..a0e4fc88 100644
--- a/test/converters/xcresult/xcresult.go
+++ b/test/converters/xcresult/xcresult.go
@@ -2,6 +2,8 @@ package xcresult
 
 import (
 	"path/filepath"
+	"strings"
+	"unicode"
 
 	"github.com/bitrise-io/go-utils/fileutil"
 	"github.com/bitrise-io/go-utils/pathutil"
@@ -32,6 +34,33 @@ func (h *Converter) Detect(files []string) bool {
 	return false
 }
 
+// by one of our issue reports, need to replace backspace char (U+0008) as it is an invalid character for xml unmarshaller
+// the legal character ranges are here: https://www.w3.org/TR/REC-xml/#charsets
+// so the exclusion will be:
+/*
+	\u0000 - \u0008
+	\u000B
+	\u000C
+	\u000E - \u001F
+	\u007F - \u0084
+	\u0086 - \u009F
+	\uD800 - \uDFFF
+
+	Unicode range D800–DFFF is used as surrogate pair. Unicode and ISO/IEC 10646 do not assign characters to any of the code points in the D800–DFFF range, so an individual code value from a surrogate pair does not represent a character. (A couple of code points — the first from the high surrogate area (D800–DBFF), and the second from the low surrogate area (DC00–DFFF) — are used in UTF-16 to represent a character in supplementary planes)
+	\uFDD0 - \uFDEF; \uFFFE; \uFFFF
+*/
+// These are non-characters in the standard, not assigned to anything; and have no meaning.
+func filterIllegalChars(data []byte) (filtered []byte) {
+	illegalCharFilter := func(r rune) rune {
+		if unicode.IsPrint(r) {
+			return r
+		}
+		return -1
+	}
+	filtered = []byte(strings.Map(illegalCharFilter, string(data)))
+	return
+}
+
 // XML ...
 func (h *Converter) XML() (junit.XML, error) {
 	data, err := fileutil.ReadBytesFromFile(h.testSummariesPlistPath)
@@ -39,6 +68,8 @@ func (h *Converter) XML() (junit.XML, error) {
 		return junit.XML{}, err
 	}
 
+	data = filterIllegalChars(data)
+
 	var plistData TestSummaryPlist
 	if _, err := plist.Unmarshal(data, &plistData); err != nil {
 		return junit.XML{}, err
diff --git a/test/converters/xcresult/xcresult_test.go b/test/converters/xcresult/xcresult_test.go
new file mode 100644
index 00000000..a36158b1
--- /dev/null
+++ b/test/converters/xcresult/xcresult_test.go
@@ -0,0 +1,15 @@
+package xcresult
+
+import (
+	"reflect"
+	"testing"
+)
+
+func Test_filterIllegalChars(t *testing.T) {
+	// \b == /u0008 -> backspace
+	content := []byte("test\b text")
+
+	if !reflect.DeepEqual(filterIllegalChars(content), []byte("test text")) {
+		t.Fatal("illegal character is not removed")
+	}
+}