Skip to content

Commit

Permalink
[git] Fix parsing error when empty logs are read from a repository
Browse files Browse the repository at this point in the history
Fixes #17
  • Loading branch information
sduenas committed Mar 10, 2016
1 parent 1917bcd commit 55673d7
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 4 deletions.
25 changes: 21 additions & 4 deletions perceval/backends/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,10 @@ class GitParser:
The commit ends with an empty line.
Take into account that one empty line is valid at the beginning
of the log. This allows to parse empty logs without raising
exceptions.
This example was generated using the next command:
git log --raw --numstat --pretty=fuller --decorate=full \
Expand Down Expand Up @@ -332,21 +336,23 @@ class GitParser:
GIT_NEXT_STATE_REGEXP = re.compile(EMPTY_LINE_PATTERN, re.VERBOSE)

# Git parser status
(COMMIT,
(INIT,
COMMIT,
HEADER,
MESSAGE,
FILE) = range(4)
FILE) = range(5)

def __init__(self, stream):
self.stream = stream
self.nline = 0
self.state = self.COMMIT
self.state = self.INIT

# Aux vars to store the commit that is being parsed
self.commit = None
self.commit_files = {}

self.handlers = {
self.INIT : self._handle_init,
self.COMMIT : self._handle_commit,
self.HEADER : self._handle_header,
self.MESSAGE : self._handle_message,
Expand All @@ -364,7 +370,7 @@ def parse(self):
while not parsed:
parsed = self.handlers[self.state](line)

if self.state == self.COMMIT:
if self.state == self.COMMIT and self.commit:
commit = self._build_commit()
logger.debug("Commit %s parsed", commit['commit'])
yield commit
Expand All @@ -389,6 +395,17 @@ def remove_none_values(d):

return commit

def _handle_init(self, line):
m = self.GIT_NEXT_STATE_REGEXP.match(line)

# In both cases, the parser advances to the next state.
# It only has to check whether the line has to be parsed
# again or not
self.state = self.COMMIT
parsed = m is not None

return parsed

def _handle_commit(self, line):
m = self.GIT_COMMIT_REGEXP.match(line)
if not m:
Expand Down
1 change: 1 addition & 0 deletions tests/data/git_log_empty.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

33 changes: 33 additions & 0 deletions tests/test_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,17 @@ def test_fetch_since_date(self):

shutil.rmtree(new_path)

def test_fetch_empty_log(self):
"""Test whether it parsers an empty log"""

new_path = os.path.join(self.tmp_path, 'newgit')

from_date = datetime.datetime(2020, 1, 1, 1, 1, 1)
git = Git(self.git_path, new_path)
commits = [commit for commit in git.fetch(from_date=from_date)]

self.assertListEqual(commits, [])

def test_fetch_from_file(self):
"""Test whether commits are fetched from a Git log file"""

Expand Down Expand Up @@ -282,6 +293,15 @@ def test_parser(self):
}
self.assertDictEqual(commits[5], expected)

def test_parser_empty_log(self):
"""Test if it parsers an empty git log stream"""

with open("data/git_log_empty.txt", 'r') as f:
parser = GitParser(f)
commits = [commit for commit in parser.parse()]

self.assertListEqual(commits, [])

def test_commit_pattern(self):
"""Test commit pattern"""

Expand Down Expand Up @@ -548,6 +568,19 @@ def test_log_from_date(self):

shutil.rmtree(new_path)

def test_log_empty(self):
"""Test if an empty line is returned when the log is empty"""

new_path = os.path.join(self.tmp_path, 'newgit')

repo = GitRepository.clone(self.git_path, new_path)
gitlog = repo.log(from_date=datetime.datetime(2020, 1, 1, 1, 1, 1))
gitlog = [line for line in gitlog]

self.assertListEqual(gitlog, [''])

shutil.rmtree(new_path)


if __name__ == "__main__":
unittest.main()

0 comments on commit 55673d7

Please sign in to comment.