From 042af9f606f9c58ee8dab97bb25423ac70c294d1 Mon Sep 17 00:00:00 2001 From: Jaspar Stach Date: Tue, 14 Nov 2023 13:27:32 +0100 Subject: [PATCH 1/5] Fix: Find Copyright headers with old company --- pontos/updateheader/updateheader.py | 49 +++++++++++++++++------- tests/updateheader/test_header.py | 59 +++++++++++++++++++++-------- 2 files changed, 78 insertions(+), 30 deletions(-) diff --git a/pontos/updateheader/updateheader.py b/pontos/updateheader/updateheader.py index b17e46cb8..896d874a5 100644 --- a/pontos/updateheader/updateheader.py +++ b/pontos/updateheader/updateheader.py @@ -76,6 +76,7 @@ "along with this program; if not, write to the Free Software", "Foundation, Inc\., 51 Franklin St, Fifth Floor, Boston, MA 02110\-1301 USA\.", # noqa: E501 ] +OLD_COMPANY = "Greenbone Networks GmbH" def _get_modified_year(f: Path) -> str: @@ -96,10 +97,10 @@ def _get_modified_year(f: Path) -> str: def _find_copyright( line: str, - regex: re.Pattern, + copyright_regex: re.Pattern, ) -> Tuple[bool, Union[Dict[str, Union[str, None]], None]]: - """Match the line for the regex""" - copyright_match = re.search(regex, line) + """Match the line for the copyright_regex""" + copyright_match = re.search(copyright_regex, line) if copyright_match: return ( True, @@ -151,22 +152,26 @@ def _remove_outdated( break i = i + 1 if changed: - return "\n".join(splitted_lines) + return "\n".join(splitted_lines) + "\n" return None def _update_file( file: Path, - regex: re.Pattern, + copyright_regex: re.Pattern, parsed_args: Namespace, term: Terminal, cleanup_regexes: Optional[List[re.Pattern]] = None, + old_company_copyright_regex: Optional[re.Pattern] = None, ) -> int: """Function to update the given file. Checks if header exists. If not it adds an header to that file, else it checks if year is up to date """ + cleanup = False + if cleanup_regexes and old_company_copyright_regex: + cleanup = True if parsed_args.changed: try: @@ -186,7 +191,13 @@ def _update_file( if line == "": i = 0 continue - found, copyright_match = _find_copyright(line=line, regex=regex) + found, copyright_match = _find_copyright( + line=line, copyright_regex=copyright_regex + ) + if cleanup and not found: + found, copyright_match = _find_copyright( + line=line, copyright_regex=old_company_copyright_regex # type: ignore # noqa: E501 + ) i = i - 1 # header not found, add header if i == 0 and not found: @@ -201,9 +212,7 @@ def _update_file( fp.seek(0) # back to beginning of file rest_of_file = fp.read() fp.seek(0) - fp.write(header) - fp.write("\n") - fp.write(rest_of_file) + fp.write(header + "\n" + rest_of_file) print(f"{file}: Added license header.") return 0 except ValueError: @@ -238,7 +247,7 @@ def _update_file( f'{copyright_match["creation_year"]}' f'-{parsed_args.year} {copyright_match["company"]}' ) - new_line = re.sub(regex, copyright_term, line) + new_line = re.sub(copyright_regex, copyright_term, line) fp_write = fp.tell() - len(line) # save position to insert rest_of_file = fp.read() fp.seek(fp_write) @@ -405,6 +414,14 @@ def _compile_outdated_regex() -> List[re.Pattern]: return regexes +def _compile_copyright_regex(company: str) -> re.Pattern: + """prepare the copyright regex""" + c_str = r"(SPDX-FileCopyrightText:|[Cc]opyright)" + d_str = r"(19[0-9]{2}|20[0-9]{2})" + + return re.compile(rf"{c_str}.*? {d_str}?-? ?{d_str}? ({company})") + + def main() -> None: parsed_args = _parse_args() exclude_list = [] @@ -443,10 +460,13 @@ def main() -> None: term.error("Specify files to update!") sys.exit(1) - regex: re.Pattern = re.compile( - "(SPDX-FileCopyrightText:|[Cc]opyright).*?(19[0-9]{2}|20[0-9]{2}) " - f"?-? ?(19[0-9]{{2}}|20[0-9]{{2}})? ({parsed_args.company})" + copyright_regex: re.Pattern = _compile_copyright_regex( + company=parsed_args.company + ) + old_company_copyright_regex: re.Pattern = _compile_copyright_regex( + company=OLD_COMPANY ) + cleanup_regexes: Optional[List[re.Pattern]] = None if parsed_args.cleanup: cleanup_regexes = _compile_outdated_regex() @@ -458,10 +478,11 @@ def main() -> None: else: _update_file( file=file, - regex=regex, + copyright_regex=copyright_regex, parsed_args=parsed_args, term=term, cleanup_regexes=cleanup_regexes, + old_company_copyright_regex=old_company_copyright_regex, ) except (FileNotFoundError, UnicodeDecodeError, ValueError): continue diff --git a/tests/updateheader/test_header.py b/tests/updateheader/test_header.py index fa6d26cb9..398245780 100644 --- a/tests/updateheader/test_header.py +++ b/tests/updateheader/test_header.py @@ -107,7 +107,9 @@ def test_find_copyright(self): ) # Full match - found, match = find_copyright(regex=self.regex, line=test_line) + found, match = find_copyright( + copyright_regex=self.regex, line=test_line + ) self.assertTrue(found) self.assertIsNotNone(match) self.assertEqual(match["creation_year"], "1995") @@ -115,7 +117,9 @@ def test_find_copyright(self): self.assertEqual(match["company"], self.args.company) # No modification Date - found, match = find_copyright(regex=self.regex, line=test2_line) + found, match = find_copyright( + copyright_regex=self.regex, line=test2_line + ) self.assertTrue(found) self.assertIsNotNone(match) self.assertEqual(match["creation_year"], "1995") @@ -123,7 +127,9 @@ def test_find_copyright(self): self.assertEqual(match["company"], self.args.company) # No match - found, match = find_copyright(regex=self.regex, line=invalid_line) + found, match = find_copyright( + copyright_regex=self.regex, line=invalid_line + ) self.assertFalse(found) self.assertIsNone(match) @@ -136,7 +142,9 @@ def test_find_spdx_copyright(self): ) # Full match - found, match = find_copyright(regex=self.regex, line=test_line) + found, match = find_copyright( + copyright_regex=self.regex, line=test_line + ) self.assertTrue(found) self.assertIsNotNone(match) self.assertEqual(match["creation_year"], "1995") @@ -144,7 +152,9 @@ def test_find_spdx_copyright(self): self.assertEqual(match["company"], self.args.company) # No modification Date - found, match = find_copyright(regex=self.regex, line=test2_line) + found, match = find_copyright( + copyright_regex=self.regex, line=test2_line + ) self.assertTrue(found) self.assertIsNotNone(match) self.assertEqual(match["creation_year"], "1995") @@ -152,7 +162,9 @@ def test_find_spdx_copyright(self): self.assertEqual(match["company"], self.args.company) # No match - found, match = find_copyright(regex=self.regex, line=invalid_line) + found, match = find_copyright( + copyright_regex=self.regex, line=invalid_line + ) self.assertFalse(found) self.assertIsNone(match) @@ -201,7 +213,7 @@ def test_update_file_not_existing(self, mock_stdout): with self.assertRaises(FileNotFoundError): update_file( file=test_file, - regex=self.regex, + copyright_regex=self.regex, parsed_args=self.args, term=term, ) @@ -224,7 +236,10 @@ def test_update_file_wrong_license(self, mock_stdout): test_file.touch() code = update_file( - file=test_file, regex=self.regex, parsed_args=self.args, term=term + file=test_file, + copyright_regex=self.regex, + parsed_args=self.args, + term=term, ) self.assertEqual(code, 1) @@ -249,7 +264,10 @@ def test_update_file_suffix_invalid(self, mock_stdout): test_file.touch() code = update_file( - file=test_file, regex=self.regex, parsed_args=self.args, term=term + file=test_file, + copyright_regex=self.regex, + parsed_args=self.args, + term=term, ) self.assertEqual(code, 1) @@ -281,7 +299,7 @@ def test_update_file_binary_file(self, mock_stdout): with self.assertRaises(UnicodeDecodeError): code = update_file( file=test_file, - regex=self.regex, + copyright_regex=self.regex, parsed_args=self.args, term=term, ) @@ -310,7 +328,7 @@ def test_update_file_changed(self, mock_stdout): with self.assertRaises(FileNotFoundError): code = update_file( file=test_file, - regex=self.regex, + copyright_regex=self.regex, parsed_args=self.args, term=term, ) @@ -338,7 +356,10 @@ def test_update_create_header(self, mock_stdout): test_file.touch() code = update_file( - file=test_file, regex=self.regex, parsed_args=self.args, term=term + file=test_file, + copyright_regex=self.regex, + parsed_args=self.args, + term=term, ) ret = mock_stdout.getvalue() @@ -367,7 +388,10 @@ def test_update_header_in_file(self, mock_stdout): test_file.write_text(header, encoding="utf-8") code = update_file( - file=test_file, regex=self.regex, parsed_args=self.args, term=term + file=test_file, + copyright_regex=self.regex, + parsed_args=self.args, + term=term, ) self.assertEqual(code, 0) @@ -401,7 +425,10 @@ def test_update_header_ok_in_file(self, mock_stdout): test_file.write_text(header, encoding="utf-8") code = update_file( - file=test_file, regex=self.regex, parsed_args=self.args, term=term + file=test_file, + copyright_regex=self.regex, + parsed_args=self.args, + term=term, ) self.assertEqual(code, 0) @@ -523,7 +550,7 @@ def test_remove_outdated(self): new_content = remove_outdated( content=test_content, cleanup_regexes=compiled_regexes ) - self.assertEqual(new_content, "") + self.assertEqual(new_content, "\n") def test_remove_outdated2(self): test_content = """the Free Software Foundation, either version 3 of the License, or @@ -542,4 +569,4 @@ def test_remove_outdated2(self): new_content = remove_outdated( content=test_content, cleanup_regexes=compiled_regexes ) - self.assertEqual(new_content, "") + self.assertEqual(new_content, "\n") From 35ce990828f6d57b795e18ecd7cd87eda1e8fbb0 Mon Sep 17 00:00:00 2001 From: Jaspar Stach Date: Wed, 15 Nov 2023 10:14:21 +0100 Subject: [PATCH 2/5] WiP --- pontos/updateheader/updateheader.py | 30 +++++++++++++++++------------ tests/updateheader/test_header.py | 12 +++++++----- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/pontos/updateheader/updateheader.py b/pontos/updateheader/updateheader.py index 896d874a5..e8a1f54b1 100644 --- a/pontos/updateheader/updateheader.py +++ b/pontos/updateheader/updateheader.py @@ -72,6 +72,7 @@ "GNU Affero General Public License for more details.", "GNU General Public License for more details.", "You should have received a copy of the GNU Affero General Public License", + "You should have received a copy of the GNU General Public License", "along with this program. If not, see .", "along with this program; if not, write to the Free Software", "Foundation, Inc\., 51 Franklin St, Fifth Floor, Boston, MA 02110\-1301 USA\.", # noqa: E501 @@ -136,7 +137,7 @@ def _add_header( raise ValueError -def _remove_outdated( +def _remove_outdated_lines( content: str, cleanup_regexes: List[re.Pattern] ) -> Optional[str]: """Remove lines that contain outdated copyright header ...""" @@ -144,6 +145,10 @@ def _remove_outdated( splitted_lines = content.splitlines() i = 0 for line in splitted_lines[:20]: + if i > 3 and re.match(r"^(([#*]|//) ?)", line): + splitted_lines.pop(i) + i = i - 1 + continue for regex in cleanup_regexes: if regex.match(line): changed = True @@ -152,7 +157,8 @@ def _remove_outdated( break i = i + 1 if changed: - return "\n".join(splitted_lines) + "\n" + new_content = "\n".join(splitted_lines) + "\n" + return new_content return None @@ -226,15 +232,6 @@ def _update_file( "is not existing." ) return 1 - # old header existing - cleanup? - if cleanup_regexes: - old_content = file.read_text(encoding="utf-8") - new_content = _remove_outdated( - content=old_content, cleanup_regexes=cleanup_regexes - ) - if new_content: - file.write_text(new_content, encoding="utf-8") - print(f"{file}: Cleaned up!") # replace found header and write it to file if copyright_match and ( not copyright_match["modification_year"] @@ -265,13 +262,22 @@ def _update_file( else: print(f"{file}: License Header is ok.") - return 0 except FileNotFoundError as e: print(f"{file}: File is not existing.") raise e except UnicodeDecodeError as e: print(f"{file}: Ignoring binary file.") raise e + # old header existing - cleanup? + if cleanup_regexes: + old_content = file.read_text(encoding="utf-8") + new_content = _remove_outdated_lines( + content=old_content, cleanup_regexes=cleanup_regexes + ) + if new_content: + file.write_text(new_content, encoding="utf-8") + print(f"{file}: Cleaned up!") + return 0 def _get_exclude_list( diff --git a/tests/updateheader/test_header.py b/tests/updateheader/test_header.py index 398245780..b1089d7fb 100644 --- a/tests/updateheader/test_header.py +++ b/tests/updateheader/test_header.py @@ -39,7 +39,9 @@ _get_modified_year as get_modified_year, ) from pontos.updateheader.updateheader import _parse_args as parse_args -from pontos.updateheader.updateheader import _remove_outdated as remove_outdated +from pontos.updateheader.updateheader import ( + _remove_outdated_lines as remove_outdated_lines, +) from pontos.updateheader.updateheader import _update_file as update_file from pontos.updateheader.updateheader import main @@ -533,7 +535,7 @@ def test_main_never_happen(self, argparser_mock, mock_stdout): ret, ) - def test_remove_outdated(self): + def test_remove_outdated_lines(self): test_content = """* This program is free software: you can redistribute it and/or modify *it under the terms of the GNU Affero General Public License as // published by the Free Software Foundation, either version 3 of the @@ -547,12 +549,12 @@ def test_remove_outdated(self): compiled_regexes = compile_outdated_regex() - new_content = remove_outdated( + new_content = remove_outdated_lines( content=test_content, cleanup_regexes=compiled_regexes ) self.assertEqual(new_content, "\n") - def test_remove_outdated2(self): + def test_remove_outdated_lines2(self): test_content = """the Free Software Foundation, either version 3 of the License, or (at your option) any later version. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -566,7 +568,7 @@ def test_remove_outdated2(self): compiled_regexes = compile_outdated_regex() - new_content = remove_outdated( + new_content = remove_outdated_lines( content=test_content, cleanup_regexes=compiled_regexes ) self.assertEqual(new_content, "\n") From 942b93c45334fbcb727d05826d36e48b30b4385d Mon Sep 17 00:00:00 2001 From: Jaspar Stach Date: Wed, 20 Dec 2023 14:09:03 +0100 Subject: [PATCH 3/5] Finally fixed all issues, so testcase works, this should finally cleanup as expected --- pontos/updateheader/updateheader.py | 26 ++++------ tests/updateheader/test_header.py | 73 ++++++++++++++++++++++++++--- 2 files changed, 75 insertions(+), 24 deletions(-) diff --git a/pontos/updateheader/updateheader.py b/pontos/updateheader/updateheader.py index e8a1f54b1..95343bb2a 100644 --- a/pontos/updateheader/updateheader.py +++ b/pontos/updateheader/updateheader.py @@ -145,12 +145,12 @@ def _remove_outdated_lines( splitted_lines = content.splitlines() i = 0 for line in splitted_lines[:20]: - if i > 3 and re.match(r"^(([#*]|//) ?)", line): + if i > 3 and re.match(r"^(([#*]|//) ?$)", line): splitted_lines.pop(i) - i = i - 1 continue for regex in cleanup_regexes: if regex.match(line): + ("match") changed = True splitted_lines.pop(i) i = i - 1 @@ -168,16 +168,12 @@ def _update_file( parsed_args: Namespace, term: Terminal, cleanup_regexes: Optional[List[re.Pattern]] = None, - old_company_copyright_regex: Optional[re.Pattern] = None, ) -> int: """Function to update the given file. Checks if header exists. If not it adds an header to that file, else it checks if year is up to date """ - cleanup = False - if cleanup_regexes and old_company_copyright_regex: - cleanup = True if parsed_args.changed: try: @@ -200,10 +196,6 @@ def _update_file( found, copyright_match = _find_copyright( line=line, copyright_regex=copyright_regex ) - if cleanup and not found: - found, copyright_match = _find_copyright( - line=line, copyright_regex=old_company_copyright_regex # type: ignore # noqa: E501 - ) i = i - 1 # header not found, add header if i == 0 and not found: @@ -242,7 +234,7 @@ def _update_file( copyright_term = ( f"SPDX-FileCopyrightText: " f'{copyright_match["creation_year"]}' - f'-{parsed_args.year} {copyright_match["company"]}' + f"-{parsed_args.year} {parsed_args.company}" ) new_line = re.sub(copyright_regex, copyright_term, line) fp_write = fp.tell() - len(line) # save position to insert @@ -420,12 +412,14 @@ def _compile_outdated_regex() -> List[re.Pattern]: return regexes -def _compile_copyright_regex(company: str) -> re.Pattern: +def _compile_copyright_regex(company: Union[str, List[str]]) -> re.Pattern: """prepare the copyright regex""" c_str = r"(SPDX-FileCopyrightText:|[Cc]opyright)" d_str = r"(19[0-9]{2}|20[0-9]{2})" - return re.compile(rf"{c_str}.*? {d_str}?-? ?{d_str}? ({company})") + if isinstance(company, str): + return re.compile(rf"{c_str}.*? {d_str}?-? ?{d_str}? ({company})") + return re.compile(rf"{c_str}.*? {d_str}?-? ?{d_str}? ({'|'.join(company)})") def main() -> None: @@ -467,10 +461,7 @@ def main() -> None: sys.exit(1) copyright_regex: re.Pattern = _compile_copyright_regex( - company=parsed_args.company - ) - old_company_copyright_regex: re.Pattern = _compile_copyright_regex( - company=OLD_COMPANY + company=[parsed_args.company, OLD_COMPANY] ) cleanup_regexes: Optional[List[re.Pattern]] = None @@ -488,7 +479,6 @@ def main() -> None: parsed_args=parsed_args, term=term, cleanup_regexes=cleanup_regexes, - old_company_copyright_regex=old_company_copyright_regex, ) except (FileNotFoundError, UnicodeDecodeError, ValueError): continue diff --git a/tests/updateheader/test_header.py b/tests/updateheader/test_header.py index b1089d7fb..f481aa397 100644 --- a/tests/updateheader/test_header.py +++ b/tests/updateheader/test_header.py @@ -27,7 +27,10 @@ from unittest.mock import patch from pontos.terminal.terminal import ConsoleTerminal +from pontos.testing import temp_file +from pontos.updateheader.updateheader import OLD_COMPANY from pontos.updateheader.updateheader import _add_header as add_header +from pontos.updateheader.updateheader import _compile_copyright_regex from pontos.updateheader.updateheader import ( _compile_outdated_regex as compile_outdated_regex, ) @@ -535,6 +538,11 @@ def test_main_never_happen(self, argparser_mock, mock_stdout): ret, ) + +class UpdateHeaderCleanupTestCase(TestCase): + def setUp(self) -> None: + self.compiled_regexes = compile_outdated_regex() + def test_remove_outdated_lines(self): test_content = """* This program is free software: you can redistribute it and/or modify *it under the terms of the GNU Affero General Public License as @@ -547,10 +555,8 @@ def test_remove_outdated_lines(self): # version 2 as published by the Free Software Foundation. This program is free software: you can redistribute it and/or modify""" # noqa: E501 - compiled_regexes = compile_outdated_regex() - new_content = remove_outdated_lines( - content=test_content, cleanup_regexes=compiled_regexes + content=test_content, cleanup_regexes=self.compiled_regexes ) self.assertEqual(new_content, "\n") @@ -566,9 +572,64 @@ def test_remove_outdated_lines2(self): along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.""" # noqa: E501 - compiled_regexes = compile_outdated_regex() - new_content = remove_outdated_lines( - content=test_content, cleanup_regexes=compiled_regexes + content=test_content, cleanup_regexes=self.compiled_regexes ) self.assertEqual(new_content, "\n") + + def test_cleanup_file(self): + test_content = """# Copyright (C) 2021-2022 Greenbone Networks GmbH +# +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import foo +import bar + +foo.baz(bar.boing) +""" # noqa: E501 + + expected_content = f"""# SPDX-FileCopyrightText: 2021-{str(datetime.datetime.now().year)} Greenbone AG +# +# SPDX-License-Identifier: GPL-3.0-or-later +# + +import foo +import bar + +foo.baz(bar.boing) +""" # noqa: E501 + + with temp_file(content=test_content, name="foo.py") as tmp: + args = Namespace() + args.company = "Greenbone AG" + args.year = str(datetime.datetime.now().year) + args.changed = False + args.license_id = "GPL-3.0-or-later" + args.verbose = 0 + args.cleanup = True + + update_file( + file=tmp, + copyright_regex=_compile_copyright_regex( + ["Greenbone AG", OLD_COMPANY] + ), + parsed_args=args, + term=Terminal(), + cleanup_regexes=self.compiled_regexes, + ) + + new_content = tmp.read_text(encoding="utf-8") + self.assertEqual(expected_content, new_content) From e3f9aa0cd2fc46486deac9388e2d176d86c4855b Mon Sep 17 00:00:00 2001 From: Jaspar Stach Date: Thu, 21 Dec 2023 11:28:06 +0100 Subject: [PATCH 4/5] Fix ruff --- tests/updateheader/test_header.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/updateheader/test_header.py b/tests/updateheader/test_header.py index f481aa397..e2fc17d23 100644 --- a/tests/updateheader/test_header.py +++ b/tests/updateheader/test_header.py @@ -28,9 +28,12 @@ from pontos.terminal.terminal import ConsoleTerminal from pontos.testing import temp_file -from pontos.updateheader.updateheader import OLD_COMPANY +from pontos.updateheader.updateheader import ( + OLD_COMPANY, + _compile_copyright_regex, + main, +) from pontos.updateheader.updateheader import _add_header as add_header -from pontos.updateheader.updateheader import _compile_copyright_regex from pontos.updateheader.updateheader import ( _compile_outdated_regex as compile_outdated_regex, ) @@ -46,7 +49,6 @@ _remove_outdated_lines as remove_outdated_lines, ) from pontos.updateheader.updateheader import _update_file as update_file -from pontos.updateheader.updateheader import main HEADER = """# SPDX-FileCopyrightText: {date} Greenbone AG # From ba530961de6579b55cc491afc3eec4d017274e77 Mon Sep 17 00:00:00 2001 From: Jaspar S Date: Thu, 21 Dec 2023 11:31:26 +0100 Subject: [PATCH 5/5] Update pontos/updateheader/updateheader.py --- pontos/updateheader/updateheader.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pontos/updateheader/updateheader.py b/pontos/updateheader/updateheader.py index 95343bb2a..c827874b6 100644 --- a/pontos/updateheader/updateheader.py +++ b/pontos/updateheader/updateheader.py @@ -150,7 +150,6 @@ def _remove_outdated_lines( continue for regex in cleanup_regexes: if regex.match(line): - ("match") changed = True splitted_lines.pop(i) i = i - 1