-
Notifications
You must be signed in to change notification settings - Fork 177
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[git] Use "backslashreplace" instead of "surrogateescape".
When decoding as utf8, if the character cannnot be decoded, use the backslashreplace error handler, instead of the surrogateescape error handler. Fixes #18 for git backend, maybe others should be fixed too.
- Loading branch information
Showing
3 changed files
with
30 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
commit c4c8ea948aa21527d502e87227b2f1d951bc506d d69332b875efb52ea5276d5638ce572fcd7375f2 | ||
Author: Jason Gaston <[email protected]> | ||
AuthorDate: Sat Apr 16 15:24:43 2005 -0700 | ||
Commit: Linus Torvalds <[email protected]> | ||
CommitDate: Sat Apr 16 15:24:43 2005 -0700 | ||
|
||
[PATCH] intel8x0: AC'97 audio patch for Intel ESB2 | ||
|
||
Signed-off-by: �Jason Gaston <[email protected]> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -204,6 +204,22 @@ def test_git_encoding_error(self): | |
self.assertEqual(commit['commit'], 'cb24e4f2f7b2a7f3450bfb15d1cbaa97371e93fb') | ||
self.assertEqual(commit['message'], 'Calling \udc93Open Type\udc94 (CTRL+SHIFT+T) after startup - performance improvement.') | ||
|
||
def test_git_utf8_error(self): | ||
"""Characters that cannot decoded as utf8 can be later encoded as utf8. | ||
This test raised the following exception before being fixed: | ||
"UnicodeEncodeError: 'utf-8' codec can't encode character '\udca0' | ||
in position 153: surrogates not allowed" | ||
""" | ||
|
||
message_ok = b"[PATCH] intel8x0: AC'97 audio patch for Intel ESB2\n" \ | ||
+ b"\nSigned-off-by: \\xa0Jason Gaston <[email protected]>" | ||
|
||
commits = Git.parse_git_log_from_file("data/git_bad_utf8.txt") | ||
commit = [commit for commit in commits][0] | ||
self.assertEqual(commit['message'].encode('utf8'), message_ok) | ||
|
||
def test_git_parser_from_iter(self): | ||
"""Test if the static method parses a git log from a repository""" | ||
|
||
|