diff --git a/.github/workflows/run-formatter.yml b/.github/workflows/run-formatter.yml index a7fd7bb8..e0133bc0 100644 --- a/.github/workflows/run-formatter.yml +++ b/.github/workflows/run-formatter.yml @@ -3,10 +3,10 @@ name: Run Black Formatter throughout the codebase on: push: paths: - - '**.py' + - "**.py" pull_request: paths: - - '**.py' + - "**.py" permissions: contents: write # Allows auto-fix commits to be made @@ -23,7 +23,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: "3.12" - name: Install Black run: pip install black diff --git a/.github/workflows/run-linter.yml b/.github/workflows/run-linter.yml index ea25ccca..3bf35c74 100644 --- a/.github/workflows/run-linter.yml +++ b/.github/workflows/run-linter.yml @@ -20,7 +20,7 @@ jobs: - name: Install Python uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: '3.12' - name: Install Ruff run: pip install ruff diff --git a/README.md b/README.md index 99455b86..c88f4f11 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,7 @@ python3 main.py -p MetaMask/metamask-extension -v v11.11.0 -s -pm yarn-berry 2. Differential analysis: -``` +```bash python3 main.py -p MetaMask/metamask-extension -v v11.11.0 -vn v11.12.0 -s -d -pm yarn-berry ``` diff --git a/tool/compare_commits.py b/tool/compare_commits.py index 472a92e8..9a58dd7a 100644 --- a/tool/compare_commits.py +++ b/tool/compare_commits.py @@ -29,6 +29,7 @@ def tag_format(tag, package_name): f"v.{tag}", ] + only_package_name, artifact_id_parts = None, None if "/" in package_name: # NPM-based only_package_name = package_name.split("/")[1] elif ":" in package_name: # Maven based diff --git a/tool/compare_packages.py b/tool/compare_packages.py index ef99b045..130c40cc 100644 --- a/tool/compare_packages.py +++ b/tool/compare_packages.py @@ -181,6 +181,15 @@ def get_repo_from_SA(dep_file_1, dep_file_2, SA_old, SA_new): if versions["message"] == "Upgraded package": differences[dep]["category"] = "Upgraded package" + # Check code signature changes + signature_changes = compare_code_signatures( + dep, versions["chosen_v1"], versions["chosen_v2"], SA_old, SA_new + ) + differences[dep]["signature_changes"] = signature_changes + + # If there are signature changes, add it to the category + if signature_changes["has_changes"]: + differences[dep]["category"] = "Upgraded package with signature changes" dep_name_version_old = f"{dep}@{versions['chosen_v1']}" dep_name_version_new = f"{dep}@{versions['chosen_v2']}" @@ -338,3 +347,29 @@ def differential(dep_file_1, dep_file_2, SA_1, SA_2): upgraded_pkg, no_change_pkg, ) + + +def compare_code_signatures(pkg_name, old_version, new_version, SA_1, SA_2): + """Compare code signatures between versions of a package.""" + old_pkg = f"{pkg_name}@{old_version}" + new_pkg = f"{pkg_name}@{new_version}" + + old_signature = SA_1.get(old_pkg, {}).get("code_signature", {}) + new_signature = SA_2.get(new_pkg, {}).get("code_signature", {}) + + changes = { + "old_signature_present": old_signature.get("signature_present", False), + "new_signature_present": new_signature.get("signature_present", False), + "old_signature_valid": old_signature.get("signature_valid", False), + "new_signature_valid": new_signature.get("signature_valid", False), + "has_changes": False, + } + + # Check if there are any changes in signature status + if ( + changes["old_signature_present"] != changes["new_signature_present"] + or changes["old_signature_valid"] != changes["new_signature_valid"] + ): + changes["has_changes"] = True + + return changes diff --git a/tool/report_diff.py b/tool/report_diff.py index ad4a4825..e44d547c 100644 --- a/tool/report_diff.py +++ b/tool/report_diff.py @@ -163,8 +163,12 @@ def generate_diff_report(data, project_repo_name, release_version_old, release_v new_reviewer_commits = df_author_new_reviewer.shape[0] both_new_commits = df_author_both_new.shape[0] + signature_changes = df_all[df_all["category"] == "Upgraded package with signature changes"] + signature_changes_number = signature_changes["package_name"].nunique() + counts = { ":heavy_exclamation_mark: Downgraded packages": downgraded_number, + ":lock: Packages with signature changes": signature_changes_number, ":alien: Commits made by both New Authors and Reviewers": both_new_commits, ":neutral_face: Commits made by New Authors": new_author_commits, ":see_no_evil: Commits approved by New Reviewers": new_reviewer_commits, @@ -195,6 +199,22 @@ def generate_diff_report(data, project_repo_name, release_version_old, release_v f.write("") f.write("\n") + if signature_changes_number > 0: + f.write("\n") + f.write( + f""" +
+ Packages with signature changes + """ + ) + f.write("\n\n\n") + selected_columns = ["package_name", "old_version", "new_version", "signature_changes"] + signature_changes_df = signature_changes[selected_columns] + f.write(signature_changes_df.to_markdown(index=False)) + f.write("\n") + f.write("
") + f.write("\n") + if both_new_commits > 0: f.write("\n") f.write( diff --git a/tool/report_static.py b/tool/report_static.py index beb44ad0..6d6eccee 100644 --- a/tool/report_static.py +++ b/tool/report_static.py @@ -15,6 +15,8 @@ "deprecated": ["yarn-classic", "yarn-berry", "pnpm", "npm"], "forked_package": ["yarn-classic", "yarn-berry", "pnpm", "npm", "maven"], "provenance": ["yarn-classic", "yarn-berry", "pnpm", "npm"], + "code_signature": ["yarn-classic", "yarn-berry", "pnpm", "npm", "maven"], + "invalid_code_signature": ["yarn-classic", "yarn-berry", "pnpm", "npm", "maven"], } @@ -46,6 +48,8 @@ def create_dataframe(data): "deprecated_in_version": package_data.get("package_info", {}).get("deprecated_in_version"), "provenance_in_version": package_data.get("package_info", {}).get("provenance_in_version"), "all_deprecated": package_data.get("package_info", {}).get("all_deprecated", None), + "signature_present": package_data.get("code_signature").get("signature_present"), + "signature_valid": package_data.get("code_signature").get("signature_valid"), "github_url": github_exists_data.get("github_url", "Could not find repo from package registry"), "github_exists": github_exists_data.get("github_exists", None), "github_redirected": github_exists_data.get("github_redirected", None), @@ -114,6 +118,18 @@ def write_summary(df, project_name, release_version, package_manager, filename, "provenance_in_version", ], ] + code_signature_df = df.loc[ + df["signature_present"] == False, + [ + "signature_present", + ], + ] + invalid_code_signature_df = df.loc[ + (df["signature_present"] == True) & (df["signature_valid"] == False), + [ + "signature_valid", + ], + ] common_counts = { "### Total packages in the supply chain": len(df), @@ -125,6 +141,8 @@ def write_summary(df, project_name, release_version, package_manager, filename, "release_tag_not_found": f":wrench: Packages with accessible source code repos but inaccessible GitHub tags(⚠️⚠️⚠️) {(release_tag_not_found_df.shape[0])}", "deprecated": f":x: Packages that are deprecated(⚠️⚠️) {(df['deprecated_in_version'] == True).sum()}", "forked_package": f":cactus: Packages that are forks(⚠️⚠️) {(df['is_fork'] == True).sum()}", + "code_signature": f":lock: Packages with no code signature(⚠️⚠️) {(code_signature_df.shape[0])}", + "invalid_code_signature": f":pencil: Packages with an existing but invalid code signature(⚠️⚠️) {((invalid_code_signature_df.shape[0]))}", "provenance": f":black_square_button: Packages without provenance(⚠️) {(df['provenance_in_version'] == False).sum()}", } @@ -270,6 +288,38 @@ def write_summary(df, project_name, release_version, package_manager, filename, else: md_file.write("\nAll packages have provenance.\n") + if not code_signature_df.empty: + md_file.write( + f""" +
+ List of packages without code signature({(code_signature_df.shape[0])}) + """ + ) + md_file.write("\n\n\n") + markdown_text = code_signature_df.reset_index().to_markdown(index=False) + md_file.write(markdown_text) + md_file.write("\n
\n") + elif package_manager not in SUPPORTED_SMELLS["code_signature"]: + md_file.write(f"\nThe package manager ({package_manager}) does not support checking for code signature.\n") + else: + md_file.write("\nAll packages have code signature.\n") + + if not invalid_code_signature_df.empty: + md_file.write( + f""" +
+ List of packages with an existing but invalid code signature({(invalid_code_signature_df.shape[0])}) + """ + ) + md_file.write("\n\n\n") + markdown_text = invalid_code_signature_df.reset_index().to_markdown(index=False) + md_file.write(markdown_text) + md_file.write("\n
\n") + elif package_manager not in SUPPORTED_SMELLS["code_signature"]: + md_file.write(f"\nThe package manager ({package_manager}) does not support checking for code signature.\n") + else: + md_file.write("\nAll packages have valid code signature.\n") + md_file.write("\n### Call to Action:\n") md_file.write( """ diff --git a/tool/static_analysis.py b/tool/static_analysis.py index 0f836388..4f9db8b2 100644 --- a/tool/static_analysis.py +++ b/tool/static_analysis.py @@ -6,6 +6,8 @@ from tqdm import tqdm import requests +import subprocess +import re import tool_config from compare_commits import tag_format as construct_tag_format @@ -101,6 +103,61 @@ def check_maven(package, package_version): logging.error(f"Package manager {pm} not supported.") +def check_code_signature(package_name, package_version, pm): + # TODO: caching this somehow would be nice + # TODO: find a package where we can check this, because with spoon everything is fine + def check_maven_signature(package_name, package_version): + # Construct the command + command = f"mvn org.simplify4u.plugins:pgpverify-maven-plugin:show -Dartifact={package_name}:{package_version}" + + # Run the command + output = subprocess.run(command, shell=True, capture_output=True, text=True) + + # Regular expression to extract the PGP signature section + pgp_signature_pattern = re.compile(r"PGP signature:\n(?:[ \t]*.+\n)*?[ \t]*status:\s*(\w+)", re.MULTILINE) + match = pgp_signature_pattern.search(output.stdout) + if match: + # Extract the status + status = match.group(1).strip().lower() + return {"signature_present": True, "signature_valid": status == "valid"} + + # If no match is found, return no PGP signature present + return {"signature_present": False, "signature_valid": False} + + def check_npm_signature(package, package_version): + # NOTE: for future reference, NPM migrated from PGP signatures to ECDSA registry signatures + # PGP-based registry signatures were deprecated on April 25th, 2023 + try: + response = requests.get(f"https://registry.npmjs.org/{package}", timeout=20) + response.raise_for_status() + + data = response.json() + version_info = data.get("versions", {}).get(package_version, {}) + + # Check for signature in dist metadata + dist_info = version_info.get("dist", {}) + signatures = dist_info.get("signatures", []) + + if signatures: + valid_signatures = [sig for sig in signatures if sig.get("keyid") and sig.get("sig")] + return {"signature_present": True, "signature_valid": len(valid_signatures) > 0} + + return {"signature_present": False, "signature_valid": False} + + except requests.RequestException as e: + logging.error(f"Error checking NPM signature: {str(e)}") + return {"signature_present": False, "signature_valid": False} + + if pm == "maven": + return check_maven_signature(package_name, package_version) + elif pm in ("yarn-berry", "yarn-classic", "pnpm", "npm"): + return check_npm_signature(package_name, package_version) + else: + # log stuff + # blow up + logging.error(f"Package manager {pm} not supported.") + + def api_constructor(package_name, repository): repo_url = repository.replace("https://", "").replace("http://", "").replace("/issues", "") @@ -438,6 +495,10 @@ def analyze_package_data(package, repo_url, pm, check_match=False): package_info["provenance"] = package_infos.get("provenance_in_version") package_info["package_info"] = package_infos + # Code signature checks + print(f"[INFO] Checking code signature for {package_name}...") + package_info["code_signature"] = check_code_signature(package_name, package_version, pm) + if "Could not find" in repo_url: package_info["github_exists"] = {"github_url": "No_repo_info_found"} elif "not github" in repo_url: @@ -482,7 +543,7 @@ def get_static_data(folder, packages_data, pm, check_match=False): with tqdm(total=len(packages_data), desc="Analyzing packages") as pbar: for package, repo_urls in packages_data.items(): # print(f"Analyzing {package}") - tqdm.write(f"{package}") + tqdm.write(f"[INFO] Currently analyzing {package}") repo_url = repo_urls.get("github", "") analyzed_data, error = analyze_package_data(package, repo_url, pm, check_match=check_match) pbar.update(1)