diff --git a/perceval/backends/git.py b/perceval/backends/git.py index c6d739d9d..34c35adf7 100644 --- a/perceval/backends/git.py +++ b/perceval/backends/git.py @@ -296,6 +296,10 @@ class GitParser: The commit ends with an empty line. + Take into account that one empty line is valid at the beginning + of the log. This allows to parse empty logs without raising + exceptions. + This example was generated using the next command: git log --raw --numstat --pretty=fuller --decorate=full \ @@ -332,21 +336,23 @@ class GitParser: GIT_NEXT_STATE_REGEXP = re.compile(EMPTY_LINE_PATTERN, re.VERBOSE) # Git parser status - (COMMIT, + (INIT, + COMMIT, HEADER, MESSAGE, - FILE) = range(4) + FILE) = range(5) def __init__(self, stream): self.stream = stream self.nline = 0 - self.state = self.COMMIT + self.state = self.INIT # Aux vars to store the commit that is being parsed self.commit = None self.commit_files = {} self.handlers = { + self.INIT : self._handle_init, self.COMMIT : self._handle_commit, self.HEADER : self._handle_header, self.MESSAGE : self._handle_message, @@ -364,7 +370,7 @@ def parse(self): while not parsed: parsed = self.handlers[self.state](line) - if self.state == self.COMMIT: + if self.state == self.COMMIT and self.commit: commit = self._build_commit() logger.debug("Commit %s parsed", commit['commit']) yield commit @@ -389,6 +395,17 @@ def remove_none_values(d): return commit + def _handle_init(self, line): + m = self.GIT_NEXT_STATE_REGEXP.match(line) + + # In both cases, the parser advances to the next state. + # It only has to check whether the line has to be parsed + # again or not + self.state = self.COMMIT + parsed = m is not None + + return parsed + def _handle_commit(self, line): m = self.GIT_COMMIT_REGEXP.match(line) if not m: diff --git a/tests/data/git_log_empty.txt b/tests/data/git_log_empty.txt new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/tests/data/git_log_empty.txt @@ -0,0 +1 @@ + diff --git a/tests/test_git.py b/tests/test_git.py index d3781b2eb..f26097014 100644 --- a/tests/test_git.py +++ b/tests/test_git.py @@ -130,6 +130,17 @@ def test_fetch_since_date(self): shutil.rmtree(new_path) + def test_fetch_empty_log(self): + """Test whether it parsers an empty log""" + + new_path = os.path.join(self.tmp_path, 'newgit') + + from_date = datetime.datetime(2020, 1, 1, 1, 1, 1) + git = Git(self.git_path, new_path) + commits = [commit for commit in git.fetch(from_date=from_date)] + + self.assertListEqual(commits, []) + def test_fetch_from_file(self): """Test whether commits are fetched from a Git log file""" @@ -282,6 +293,15 @@ def test_parser(self): } self.assertDictEqual(commits[5], expected) + def test_parser_empty_log(self): + """Test if it parsers an empty git log stream""" + + with open("data/git_log_empty.txt", 'r') as f: + parser = GitParser(f) + commits = [commit for commit in parser.parse()] + + self.assertListEqual(commits, []) + def test_commit_pattern(self): """Test commit pattern""" @@ -548,6 +568,19 @@ def test_log_from_date(self): shutil.rmtree(new_path) + def test_log_empty(self): + """Test if an empty line is returned when the log is empty""" + + new_path = os.path.join(self.tmp_path, 'newgit') + + repo = GitRepository.clone(self.git_path, new_path) + gitlog = repo.log(from_date=datetime.datetime(2020, 1, 1, 1, 1, 1)) + gitlog = [line for line in gitlog] + + self.assertListEqual(gitlog, ['']) + + shutil.rmtree(new_path) + if __name__ == "__main__": unittest.main()