New rules markup #518
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions | |
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions | |
name: Diff review for markup | |
on: | |
pull_request: | |
branches: [ main ] | |
jobs: | |
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # | |
download_repos: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout CredData | |
uses: actions/checkout@v4 | |
with: | |
ref: ${{ github.event.pull_request.head.sha }} | |
- name: Markup hashing | |
run: | | |
md5sum snapshot.yaml >head_checksums.md5 | |
for f in $(find meta -type f|sort); do md5sum $f; done >>head_checksums.md5 | |
for f in $(find . -maxdepth 1 -type f -name "*.py"|sort); do md5sum $f; done >>head_checksums.md5 | |
cat head_checksums.md5 | |
sha256sum head_checksums.md5 | |
- name: Cache head review | |
id: cache-data | |
uses: actions/cache@v4 | |
with: | |
path: | | |
review_head.txt | |
review_head.html | |
key: cred-data-${{ hashFiles('head_checksums.md5') }} | |
- name: Cache tmp | |
if: steps.cache-data.outputs.cache-hit != 'true' | |
id: cache-tmp | |
uses: actions/cache@v4 | |
with: | |
path: tmp | |
key: cred-data-${{ hashFiles('snapshot.yaml') }} | |
- name: install ansi2html | |
if: steps.cache-data.outputs.cache-hit != 'true' | |
run: sudo apt update && sudo apt install colorized-logs | |
- name: Set up Python 3.10 | |
if: steps.cache-data.outputs.cache-hit != 'true' | |
uses: actions/setup-python@v4 | |
with: | |
python-version: "3.10" | |
- name: Produce review report from HEAD | |
if: steps.cache-data.outputs.cache-hit != 'true' | |
run: | | |
python -m pip install --upgrade pip | |
python -m pip install --requirement requirements.txt | |
# skip extra hints from git | |
git config --global init.defaultBranch work | |
python download_data.py --data_dir data --jobs $(nproc) | |
python review_data.py &>review_head.txt | |
ansi2html --style 'pre {font-family: monospace; font-size: large}' <review_head.txt >review_head.html | |
- name: Produce benchmark scores from empty report to check markup only | |
if: steps.cache-data.outputs.cache-hit != 'true' | |
run: | | |
python -m benchmark --scanner credsweeper --load .ci/empty_report.json >benchmark.txt | |
diff --unified=3 --ignore-all-space --ignore-blank-lines .ci/benchmark.txt benchmark.txt | |
- name: Upload artifact | |
if: always() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: review_head | |
path: | | |
review_head.txt | |
review_head.html | |
benchmark.txt | |
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # | |
review_base: | |
needs: [ download_repos ] | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout CredData | |
uses: actions/checkout@v4 | |
with: | |
ref: ${{ github.event.pull_request.base.sha }} | |
- name: Markup hashing | |
run: | | |
md5sum snapshot.yaml >base_checksums.md5 | |
for f in $(find meta -type f|sort); do md5sum $f; done >>base_checksums.md5 | |
for f in $(find . -maxdepth 1 -type f -name "*.py"|sort); do md5sum $f; done >>base_checksums.md5 | |
cat base_checksums.md5 | |
sha256sum base_checksums.md5 | |
- name: Cache base review | |
id: cache-data | |
uses: actions/cache@v4 | |
with: | |
path: | | |
review_base.txt | |
review_base.html | |
key: cred-data-${{ hashFiles('base_checksums.md5') }} | |
- name: Cache tmp | |
if: steps.cache-data.outputs.cache-hit != 'true' | |
id: cache-tmp | |
uses: actions/cache@v4 | |
with: | |
path: tmp | |
key: cred-data-${{ hashFiles('snapshot.yaml') }} | |
- name: install ansi2html | |
if: steps.cache-data.outputs.cache-hit != 'true' | |
run: sudo apt update && sudo apt install colorized-logs | |
- name: Set up Python 3.10 | |
if: steps.cache-data.outputs.cache-hit != 'true' | |
uses: actions/setup-python@v4 | |
with: | |
python-version: "3.10" | |
- name: Produce review report from HEAD | |
if: steps.cache-data.outputs.cache-hit != 'true' | |
run: | | |
python -m pip install --upgrade pip | |
python -m pip install --requirement requirements.txt | |
python download_data.py --data_dir data --jobs $(nproc) | |
python review_data.py &>review_base.txt | |
ansi2html --style 'pre {font-family: monospace; font-size: large}' <review_base.txt >review_base.html | |
- name: Upload artifact | |
if: always() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: review_base | |
path: | | |
review_base.txt | |
review_base.html | |
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # | |
diff_review: | |
needs: [ download_repos, review_base ] | |
runs-on: ubuntu-latest | |
steps: | |
- name: install ansi2html | |
run: sudo apt update && sudo apt install colorized-logs | |
- name: Download all workflow run artifacts | |
uses: actions/download-artifact@v4 | |
- name: Get diff for review | |
run: | | |
# in case of difference - diff returns failure | |
if ! diff --unified=1 --color=auto review_base/review_base.txt review_head/review_head.txt &>review_diff.txt; then | |
cat review_diff.txt | ansi2html --style 'pre {font-family: monospace; font-size: large}' >review_diff.html | |
else | |
touch review_diff.html | |
fi | |
- name: Upload artifact | |
if: always() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: review_diff | |
path: | | |
review_diff.txt | |
review_diff.html | |
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # | |
markup_report_test: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout CredData | |
uses: actions/checkout@v4 | |
with: | |
ref: ${{ github.event.pull_request.head.sha }} | |
- name: Set up Python 3.10 | |
uses: actions/setup-python@v4 | |
with: | |
python-version: "3.10" | |
- name: Report markup test | |
run: | | |
echo '[ | |
{ | |
"api_validation": "NOT_AVAILABLE", | |
"ml_validation": "VALIDATED_KEY", | |
"ml_probability": 0.5, | |
"rule": "URL Credentials", | |
"severity": "high", | |
"confidence": "moderate", | |
"line_data_list": [ | |
{ | |
"line": "1b17231a97529365248357043f038afb58be4699fc955cee78d5c8a2b037e89d", | |
"line_num": 4256, | |
"path": "data/00408ef6/src/eee97fd2.c", | |
"info": "", | |
"value": "5489cf96c232e0925f96692a65e3e7afe8e09430c280e71b808da17dee9791f8", | |
"value_start": 16, | |
"value_end": 24, | |
"variable": "5b21d8adfc17cb2d231c2ec96498c8076c4a1d1e5373fa7d8b15a9e63d64d8f2", | |
"variable_start": 5, | |
"variable_end": 11, | |
"entropy_validation": { | |
"iterator": "BASE64_CHARS", | |
"entropy": 3.0, | |
"valid": false | |
} | |
} | |
] | |
} | |
]' >report.json | |
cp report.json report.bak | |
python markup_report.py report.json | |
if diff report.bak report.json; then | |
echo "Something went wrong. The files must be different" | |
exit 1 | |
else | |
# the substring from markup must appear in "api_validation" | |
grep 'eee97fd2,GitHub,00408ef6' report.json | |
fi | |
- name: Upload artifact | |
if: always() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: reports | |
path: | | |
report.json | |
report.bak | |
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # |