diff --git a/.github/workflows/run-formatter.yml b/.github/workflows/run-formatter.yml
index a7fd7bb8..e0133bc0 100644
--- a/.github/workflows/run-formatter.yml
+++ b/.github/workflows/run-formatter.yml
@@ -3,10 +3,10 @@ name: Run Black Formatter throughout the codebase
on:
push:
paths:
- - '**.py'
+ - "**.py"
pull_request:
paths:
- - '**.py'
+ - "**.py"
permissions:
contents: write # Allows auto-fix commits to be made
@@ -23,7 +23,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
- python-version: '3.10'
+ python-version: "3.12"
- name: Install Black
run: pip install black
diff --git a/.github/workflows/run-linter.yml b/.github/workflows/run-linter.yml
index ea25ccca..3bf35c74 100644
--- a/.github/workflows/run-linter.yml
+++ b/.github/workflows/run-linter.yml
@@ -20,7 +20,7 @@ jobs:
- name: Install Python
uses: actions/setup-python@v5
with:
- python-version: '3.10'
+ python-version: '3.12'
- name: Install Ruff
run: pip install ruff
diff --git a/README.md b/README.md
index 99455b86..c88f4f11 100644
--- a/README.md
+++ b/README.md
@@ -90,7 +90,7 @@ python3 main.py -p MetaMask/metamask-extension -v v11.11.0 -s -pm yarn-berry
2. Differential analysis:
-```
+```bash
python3 main.py -p MetaMask/metamask-extension -v v11.11.0 -vn v11.12.0 -s -d -pm yarn-berry
```
diff --git a/tool/compare_commits.py b/tool/compare_commits.py
index 472a92e8..9a58dd7a 100644
--- a/tool/compare_commits.py
+++ b/tool/compare_commits.py
@@ -29,6 +29,7 @@ def tag_format(tag, package_name):
f"v.{tag}",
]
+ only_package_name, artifact_id_parts = None, None
if "/" in package_name: # NPM-based
only_package_name = package_name.split("/")[1]
elif ":" in package_name: # Maven based
diff --git a/tool/compare_packages.py b/tool/compare_packages.py
index ef99b045..130c40cc 100644
--- a/tool/compare_packages.py
+++ b/tool/compare_packages.py
@@ -181,6 +181,15 @@ def get_repo_from_SA(dep_file_1, dep_file_2, SA_old, SA_new):
if versions["message"] == "Upgraded package":
differences[dep]["category"] = "Upgraded package"
+ # Check code signature changes
+ signature_changes = compare_code_signatures(
+ dep, versions["chosen_v1"], versions["chosen_v2"], SA_old, SA_new
+ )
+ differences[dep]["signature_changes"] = signature_changes
+
+ # If there are signature changes, add it to the category
+ if signature_changes["has_changes"]:
+ differences[dep]["category"] = "Upgraded package with signature changes"
dep_name_version_old = f"{dep}@{versions['chosen_v1']}"
dep_name_version_new = f"{dep}@{versions['chosen_v2']}"
@@ -338,3 +347,29 @@ def differential(dep_file_1, dep_file_2, SA_1, SA_2):
upgraded_pkg,
no_change_pkg,
)
+
+
+def compare_code_signatures(pkg_name, old_version, new_version, SA_1, SA_2):
+ """Compare code signatures between versions of a package."""
+ old_pkg = f"{pkg_name}@{old_version}"
+ new_pkg = f"{pkg_name}@{new_version}"
+
+ old_signature = SA_1.get(old_pkg, {}).get("code_signature", {})
+ new_signature = SA_2.get(new_pkg, {}).get("code_signature", {})
+
+ changes = {
+ "old_signature_present": old_signature.get("signature_present", False),
+ "new_signature_present": new_signature.get("signature_present", False),
+ "old_signature_valid": old_signature.get("signature_valid", False),
+ "new_signature_valid": new_signature.get("signature_valid", False),
+ "has_changes": False,
+ }
+
+ # Check if there are any changes in signature status
+ if (
+ changes["old_signature_present"] != changes["new_signature_present"]
+ or changes["old_signature_valid"] != changes["new_signature_valid"]
+ ):
+ changes["has_changes"] = True
+
+ return changes
diff --git a/tool/report_diff.py b/tool/report_diff.py
index ad4a4825..e44d547c 100644
--- a/tool/report_diff.py
+++ b/tool/report_diff.py
@@ -163,8 +163,12 @@ def generate_diff_report(data, project_repo_name, release_version_old, release_v
new_reviewer_commits = df_author_new_reviewer.shape[0]
both_new_commits = df_author_both_new.shape[0]
+ signature_changes = df_all[df_all["category"] == "Upgraded package with signature changes"]
+ signature_changes_number = signature_changes["package_name"].nunique()
+
counts = {
":heavy_exclamation_mark: Downgraded packages": downgraded_number,
+ ":lock: Packages with signature changes": signature_changes_number,
":alien: Commits made by both New Authors and Reviewers": both_new_commits,
":neutral_face: Commits made by New Authors": new_author_commits,
":see_no_evil: Commits approved by New Reviewers": new_reviewer_commits,
@@ -195,6 +199,22 @@ def generate_diff_report(data, project_repo_name, release_version_old, release_v
f.write("")
f.write("\n")
+ if signature_changes_number > 0:
+ f.write("\n")
+ f.write(
+ f"""
+
+ Packages with signature changes
+ """
+ )
+ f.write("\n\n\n")
+ selected_columns = ["package_name", "old_version", "new_version", "signature_changes"]
+ signature_changes_df = signature_changes[selected_columns]
+ f.write(signature_changes_df.to_markdown(index=False))
+ f.write("\n")
+ f.write(" ")
+ f.write("\n")
+
if both_new_commits > 0:
f.write("\n")
f.write(
diff --git a/tool/report_static.py b/tool/report_static.py
index beb44ad0..6d6eccee 100644
--- a/tool/report_static.py
+++ b/tool/report_static.py
@@ -15,6 +15,8 @@
"deprecated": ["yarn-classic", "yarn-berry", "pnpm", "npm"],
"forked_package": ["yarn-classic", "yarn-berry", "pnpm", "npm", "maven"],
"provenance": ["yarn-classic", "yarn-berry", "pnpm", "npm"],
+ "code_signature": ["yarn-classic", "yarn-berry", "pnpm", "npm", "maven"],
+ "invalid_code_signature": ["yarn-classic", "yarn-berry", "pnpm", "npm", "maven"],
}
@@ -46,6 +48,8 @@ def create_dataframe(data):
"deprecated_in_version": package_data.get("package_info", {}).get("deprecated_in_version"),
"provenance_in_version": package_data.get("package_info", {}).get("provenance_in_version"),
"all_deprecated": package_data.get("package_info", {}).get("all_deprecated", None),
+ "signature_present": package_data.get("code_signature").get("signature_present"),
+ "signature_valid": package_data.get("code_signature").get("signature_valid"),
"github_url": github_exists_data.get("github_url", "Could not find repo from package registry"),
"github_exists": github_exists_data.get("github_exists", None),
"github_redirected": github_exists_data.get("github_redirected", None),
@@ -114,6 +118,18 @@ def write_summary(df, project_name, release_version, package_manager, filename,
"provenance_in_version",
],
]
+ code_signature_df = df.loc[
+ df["signature_present"] == False,
+ [
+ "signature_present",
+ ],
+ ]
+ invalid_code_signature_df = df.loc[
+ (df["signature_present"] == True) & (df["signature_valid"] == False),
+ [
+ "signature_valid",
+ ],
+ ]
common_counts = {
"### Total packages in the supply chain": len(df),
@@ -125,6 +141,8 @@ def write_summary(df, project_name, release_version, package_manager, filename,
"release_tag_not_found": f":wrench: Packages with accessible source code repos but inaccessible GitHub tags(⚠️⚠️⚠️) {(release_tag_not_found_df.shape[0])}",
"deprecated": f":x: Packages that are deprecated(⚠️⚠️) {(df['deprecated_in_version'] == True).sum()}",
"forked_package": f":cactus: Packages that are forks(⚠️⚠️) {(df['is_fork'] == True).sum()}",
+ "code_signature": f":lock: Packages with no code signature(⚠️⚠️) {(code_signature_df.shape[0])}",
+ "invalid_code_signature": f":pencil: Packages with an existing but invalid code signature(⚠️⚠️) {((invalid_code_signature_df.shape[0]))}",
"provenance": f":black_square_button: Packages without provenance(⚠️) {(df['provenance_in_version'] == False).sum()}",
}
@@ -270,6 +288,38 @@ def write_summary(df, project_name, release_version, package_manager, filename,
else:
md_file.write("\nAll packages have provenance.\n")
+ if not code_signature_df.empty:
+ md_file.write(
+ f"""
+
+ List of packages without code signature({(code_signature_df.shape[0])})
+ """
+ )
+ md_file.write("\n\n\n")
+ markdown_text = code_signature_df.reset_index().to_markdown(index=False)
+ md_file.write(markdown_text)
+ md_file.write("\n \n")
+ elif package_manager not in SUPPORTED_SMELLS["code_signature"]:
+ md_file.write(f"\nThe package manager ({package_manager}) does not support checking for code signature.\n")
+ else:
+ md_file.write("\nAll packages have code signature.\n")
+
+ if not invalid_code_signature_df.empty:
+ md_file.write(
+ f"""
+
+ List of packages with an existing but invalid code signature({(invalid_code_signature_df.shape[0])})
+ """
+ )
+ md_file.write("\n\n\n")
+ markdown_text = invalid_code_signature_df.reset_index().to_markdown(index=False)
+ md_file.write(markdown_text)
+ md_file.write("\n \n")
+ elif package_manager not in SUPPORTED_SMELLS["code_signature"]:
+ md_file.write(f"\nThe package manager ({package_manager}) does not support checking for code signature.\n")
+ else:
+ md_file.write("\nAll packages have valid code signature.\n")
+
md_file.write("\n### Call to Action:\n")
md_file.write(
"""
diff --git a/tool/static_analysis.py b/tool/static_analysis.py
index 0f836388..4f9db8b2 100644
--- a/tool/static_analysis.py
+++ b/tool/static_analysis.py
@@ -6,6 +6,8 @@
from tqdm import tqdm
import requests
+import subprocess
+import re
import tool_config
from compare_commits import tag_format as construct_tag_format
@@ -101,6 +103,61 @@ def check_maven(package, package_version):
logging.error(f"Package manager {pm} not supported.")
+def check_code_signature(package_name, package_version, pm):
+ # TODO: caching this somehow would be nice
+ # TODO: find a package where we can check this, because with spoon everything is fine
+ def check_maven_signature(package_name, package_version):
+ # Construct the command
+ command = f"mvn org.simplify4u.plugins:pgpverify-maven-plugin:show -Dartifact={package_name}:{package_version}"
+
+ # Run the command
+ output = subprocess.run(command, shell=True, capture_output=True, text=True)
+
+ # Regular expression to extract the PGP signature section
+ pgp_signature_pattern = re.compile(r"PGP signature:\n(?:[ \t]*.+\n)*?[ \t]*status:\s*(\w+)", re.MULTILINE)
+ match = pgp_signature_pattern.search(output.stdout)
+ if match:
+ # Extract the status
+ status = match.group(1).strip().lower()
+ return {"signature_present": True, "signature_valid": status == "valid"}
+
+ # If no match is found, return no PGP signature present
+ return {"signature_present": False, "signature_valid": False}
+
+ def check_npm_signature(package, package_version):
+ # NOTE: for future reference, NPM migrated from PGP signatures to ECDSA registry signatures
+ # PGP-based registry signatures were deprecated on April 25th, 2023
+ try:
+ response = requests.get(f"https://registry.npmjs.org/{package}", timeout=20)
+ response.raise_for_status()
+
+ data = response.json()
+ version_info = data.get("versions", {}).get(package_version, {})
+
+ # Check for signature in dist metadata
+ dist_info = version_info.get("dist", {})
+ signatures = dist_info.get("signatures", [])
+
+ if signatures:
+ valid_signatures = [sig for sig in signatures if sig.get("keyid") and sig.get("sig")]
+ return {"signature_present": True, "signature_valid": len(valid_signatures) > 0}
+
+ return {"signature_present": False, "signature_valid": False}
+
+ except requests.RequestException as e:
+ logging.error(f"Error checking NPM signature: {str(e)}")
+ return {"signature_present": False, "signature_valid": False}
+
+ if pm == "maven":
+ return check_maven_signature(package_name, package_version)
+ elif pm in ("yarn-berry", "yarn-classic", "pnpm", "npm"):
+ return check_npm_signature(package_name, package_version)
+ else:
+ # log stuff
+ # blow up
+ logging.error(f"Package manager {pm} not supported.")
+
+
def api_constructor(package_name, repository):
repo_url = repository.replace("https://", "").replace("http://", "").replace("/issues", "")
@@ -438,6 +495,10 @@ def analyze_package_data(package, repo_url, pm, check_match=False):
package_info["provenance"] = package_infos.get("provenance_in_version")
package_info["package_info"] = package_infos
+ # Code signature checks
+ print(f"[INFO] Checking code signature for {package_name}...")
+ package_info["code_signature"] = check_code_signature(package_name, package_version, pm)
+
if "Could not find" in repo_url:
package_info["github_exists"] = {"github_url": "No_repo_info_found"}
elif "not github" in repo_url:
@@ -482,7 +543,7 @@ def get_static_data(folder, packages_data, pm, check_match=False):
with tqdm(total=len(packages_data), desc="Analyzing packages") as pbar:
for package, repo_urls in packages_data.items():
# print(f"Analyzing {package}")
- tqdm.write(f"{package}")
+ tqdm.write(f"[INFO] Currently analyzing {package}")
repo_url = repo_urls.get("github", "")
analyzed_data, error = analyze_package_data(package, repo_url, pm, check_match=check_match)
pbar.update(1)