Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add maven check for code signature #48

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/run-formatter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ name: Run Black Formatter throughout the codebase
on:
push:
paths:
- '**.py'
- "**.py"
pull_request:
paths:
- '**.py'
- "**.py"

permissions:
contents: write # Allows auto-fix commits to be made
Expand All @@ -23,7 +23,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
python-version: "3.12"

- name: Install Black
run: pip install black
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/run-linter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
python-version: '3.12'

- name: Install Ruff
run: pip install ruff
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ python3 main.py -p MetaMask/metamask-extension -v v11.11.0 -s -pm yarn-berry

2. Differential analysis:

```
```bash
python3 main.py -p MetaMask/metamask-extension -v v11.11.0 -vn v11.12.0 -s -d -pm yarn-berry
```

Expand Down
1 change: 1 addition & 0 deletions tool/compare_commits.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def tag_format(tag, package_name):
f"v.{tag}",
]

only_package_name, artifact_id_parts = None, None
if "/" in package_name: # NPM-based
only_package_name = package_name.split("/")[1]
elif ":" in package_name: # Maven based
Expand Down
35 changes: 35 additions & 0 deletions tool/compare_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,15 @@ def get_repo_from_SA(dep_file_1, dep_file_2, SA_old, SA_new):

if versions["message"] == "Upgraded package":
differences[dep]["category"] = "Upgraded package"
# Check code signature changes
signature_changes = compare_code_signatures(
dep, versions["chosen_v1"], versions["chosen_v2"], SA_old, SA_new
)
differences[dep]["signature_changes"] = signature_changes

# If there are signature changes, add it to the category
if signature_changes["has_changes"]:
differences[dep]["category"] = "Upgraded package with signature changes"

dep_name_version_old = f"{dep}@{versions['chosen_v1']}"
dep_name_version_new = f"{dep}@{versions['chosen_v2']}"
Expand Down Expand Up @@ -338,3 +347,29 @@ def differential(dep_file_1, dep_file_2, SA_1, SA_2):
upgraded_pkg,
no_change_pkg,
)


def compare_code_signatures(pkg_name, old_version, new_version, SA_1, SA_2):
"""Compare code signatures between versions of a package."""
old_pkg = f"{pkg_name}@{old_version}"
new_pkg = f"{pkg_name}@{new_version}"

old_signature = SA_1.get(old_pkg, {}).get("code_signature", {})
new_signature = SA_2.get(new_pkg, {}).get("code_signature", {})

changes = {
"old_signature_present": old_signature.get("signature_present", False),
"new_signature_present": new_signature.get("signature_present", False),
"old_signature_valid": old_signature.get("signature_valid", False),
"new_signature_valid": new_signature.get("signature_valid", False),
"has_changes": False,
}

# Check if there are any changes in signature status
if (
changes["old_signature_present"] != changes["new_signature_present"]
or changes["old_signature_valid"] != changes["new_signature_valid"]
):
changes["has_changes"] = True

return changes
20 changes: 20 additions & 0 deletions tool/report_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,12 @@ def generate_diff_report(data, project_repo_name, release_version_old, release_v
new_reviewer_commits = df_author_new_reviewer.shape[0]
both_new_commits = df_author_both_new.shape[0]

signature_changes = df_all[df_all["category"] == "Upgraded package with signature changes"]
signature_changes_number = signature_changes["package_name"].nunique()

counts = {
":heavy_exclamation_mark: Downgraded packages": downgraded_number,
":lock: Packages with signature changes": signature_changes_number,
":alien: Commits made by both New Authors and Reviewers": both_new_commits,
":neutral_face: Commits made by New Authors": new_author_commits,
":see_no_evil: Commits approved by New Reviewers": new_reviewer_commits,
Expand Down Expand Up @@ -195,6 +199,22 @@ def generate_diff_report(data, project_repo_name, release_version_old, release_v
f.write("</details>")
f.write("\n")

if signature_changes_number > 0:
f.write("\n")
f.write(
f"""
<details>
<summary>Packages with signature changes</summary>
"""
)
f.write("\n\n\n")
selected_columns = ["package_name", "old_version", "new_version", "signature_changes"]
signature_changes_df = signature_changes[selected_columns]
f.write(signature_changes_df.to_markdown(index=False))
f.write("\n")
f.write("</details>")
f.write("\n")

if both_new_commits > 0:
f.write("\n")
f.write(
Expand Down
50 changes: 50 additions & 0 deletions tool/report_static.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
"deprecated": ["yarn-classic", "yarn-berry", "pnpm", "npm"],
"forked_package": ["yarn-classic", "yarn-berry", "pnpm", "npm", "maven"],
"provenance": ["yarn-classic", "yarn-berry", "pnpm", "npm"],
"code_signature": ["yarn-classic", "yarn-berry", "pnpm", "npm", "maven"],
"invalid_code_signature": ["yarn-classic", "yarn-berry", "pnpm", "npm", "maven"],
}


Expand Down Expand Up @@ -46,6 +48,8 @@ def create_dataframe(data):
"deprecated_in_version": package_data.get("package_info", {}).get("deprecated_in_version"),
"provenance_in_version": package_data.get("package_info", {}).get("provenance_in_version"),
"all_deprecated": package_data.get("package_info", {}).get("all_deprecated", None),
"signature_present": package_data.get("code_signature").get("signature_present"),
"signature_valid": package_data.get("code_signature").get("signature_valid"),
"github_url": github_exists_data.get("github_url", "Could not find repo from package registry"),
"github_exists": github_exists_data.get("github_exists", None),
"github_redirected": github_exists_data.get("github_redirected", None),
Expand Down Expand Up @@ -114,6 +118,18 @@ def write_summary(df, project_name, release_version, package_manager, filename,
"provenance_in_version",
],
]
code_signature_df = df.loc[
df["signature_present"] == False,
[
"signature_present",
],
]
invalid_code_signature_df = df.loc[
(df["signature_present"] == True) & (df["signature_valid"] == False),
[
"signature_valid",
],
]

common_counts = {
"### Total packages in the supply chain": len(df),
Expand All @@ -125,6 +141,8 @@ def write_summary(df, project_name, release_version, package_manager, filename,
"release_tag_not_found": f":wrench: Packages with accessible source code repos but inaccessible GitHub tags(⚠️⚠️⚠️) {(release_tag_not_found_df.shape[0])}",
"deprecated": f":x: Packages that are deprecated(⚠️⚠️) {(df['deprecated_in_version'] == True).sum()}",
"forked_package": f":cactus: Packages that are forks(⚠️⚠️) {(df['is_fork'] == True).sum()}",
"code_signature": f":lock: Packages with no code signature(⚠️⚠️) {(code_signature_df.shape[0])}",
"invalid_code_signature": f":pencil: Packages with an existing but invalid code signature(⚠️⚠️) {((invalid_code_signature_df.shape[0]))}",
"provenance": f":black_square_button: Packages without provenance(⚠️) {(df['provenance_in_version'] == False).sum()}",
}

Expand Down Expand Up @@ -270,6 +288,38 @@ def write_summary(df, project_name, release_version, package_manager, filename,
else:
md_file.write("\nAll packages have provenance.\n")

if not code_signature_df.empty:
md_file.write(
f"""
<details>
<summary>List of packages without code signature({(code_signature_df.shape[0])})</summary>
"""
)
md_file.write("\n\n\n")
markdown_text = code_signature_df.reset_index().to_markdown(index=False)
md_file.write(markdown_text)
md_file.write("\n</details>\n")
elif package_manager not in SUPPORTED_SMELLS["code_signature"]:
md_file.write(f"\nThe package manager ({package_manager}) does not support checking for code signature.\n")
else:
md_file.write("\nAll packages have code signature.\n")

if not invalid_code_signature_df.empty:
md_file.write(
f"""
<details>
<summary>List of packages with an existing but invalid code signature({(invalid_code_signature_df.shape[0])})</summary>
"""
)
md_file.write("\n\n\n")
markdown_text = invalid_code_signature_df.reset_index().to_markdown(index=False)
md_file.write(markdown_text)
md_file.write("\n</details>\n")
elif package_manager not in SUPPORTED_SMELLS["code_signature"]:
md_file.write(f"\nThe package manager ({package_manager}) does not support checking for code signature.\n")
else:
md_file.write("\nAll packages have valid code signature.\n")

md_file.write("\n### Call to Action:\n")
md_file.write(
"""
Expand Down
63 changes: 62 additions & 1 deletion tool/static_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from tqdm import tqdm

import requests
import subprocess
import re

import tool_config
from compare_commits import tag_format as construct_tag_format
Expand Down Expand Up @@ -101,6 +103,61 @@ def check_maven(package, package_version):
logging.error(f"Package manager {pm} not supported.")


def check_code_signature(package_name, package_version, pm):
# TODO: caching this somehow would be nice
# TODO: find a package where we can check this, because with spoon everything is fine
def check_maven_signature(package_name, package_version):
# Construct the command
command = f"mvn org.simplify4u.plugins:pgpverify-maven-plugin:show -Dartifact={package_name}:{package_version}"

# Run the command
output = subprocess.run(command, shell=True, capture_output=True, text=True)

# Regular expression to extract the PGP signature section
pgp_signature_pattern = re.compile(r"PGP signature:\n(?:[ \t]*.+\n)*?[ \t]*status:\s*(\w+)", re.MULTILINE)
match = pgp_signature_pattern.search(output.stdout)
if match:
# Extract the status
status = match.group(1).strip().lower()
return {"signature_present": True, "signature_valid": status == "valid"}

# If no match is found, return no PGP signature present
return {"signature_present": False, "signature_valid": False}

def check_npm_signature(package, package_version):
# NOTE: for future reference, NPM migrated from PGP signatures to ECDSA registry signatures
# PGP-based registry signatures were deprecated on April 25th, 2023
try:
response = requests.get(f"https://registry.npmjs.org/{package}", timeout=20)
response.raise_for_status()

data = response.json()
version_info = data.get("versions", {}).get(package_version, {})

# Check for signature in dist metadata
dist_info = version_info.get("dist", {})
signatures = dist_info.get("signatures", [])

if signatures:
valid_signatures = [sig for sig in signatures if sig.get("keyid") and sig.get("sig")]
return {"signature_present": True, "signature_valid": len(valid_signatures) > 0}

return {"signature_present": False, "signature_valid": False}

except requests.RequestException as e:
logging.error(f"Error checking NPM signature: {str(e)}")
return {"signature_present": False, "signature_valid": False}

if pm == "maven":
return check_maven_signature(package_name, package_version)
elif pm in ("yarn-berry", "yarn-classic", "pnpm", "npm"):
return check_npm_signature(package_name, package_version)
else:
# log stuff
# blow up
logging.error(f"Package manager {pm} not supported.")


def api_constructor(package_name, repository):
repo_url = repository.replace("https://", "").replace("http://", "").replace("/issues", "")

Expand Down Expand Up @@ -438,6 +495,10 @@ def analyze_package_data(package, repo_url, pm, check_match=False):
package_info["provenance"] = package_infos.get("provenance_in_version")
package_info["package_info"] = package_infos

# Code signature checks
print(f"[INFO] Checking code signature for {package_name}...")
package_info["code_signature"] = check_code_signature(package_name, package_version, pm)

if "Could not find" in repo_url:
package_info["github_exists"] = {"github_url": "No_repo_info_found"}
elif "not github" in repo_url:
Expand Down Expand Up @@ -482,7 +543,7 @@ def get_static_data(folder, packages_data, pm, check_match=False):
with tqdm(total=len(packages_data), desc="Analyzing packages") as pbar:
for package, repo_urls in packages_data.items():
# print(f"Analyzing {package}")
tqdm.write(f"{package}")
tqdm.write(f"[INFO] Currently analyzing {package}")
repo_url = repo_urls.get("github", "")
analyzed_data, error = analyze_package_data(package, repo_url, pm, check_match=check_match)
pbar.update(1)
Expand Down