Skip to content

Commit

Permalink
Merge pull request #6313 from cx-ruiaraujo/Feature/Add_Query_Document…
Browse files Browse the repository at this point in the history
…ation_Generator

feat(documentation): add query page generator
  • Loading branch information
gabriel-cx authored May 9, 2023
2 parents ce0e794 + b5fe6cb commit a5c8d52
Show file tree
Hide file tree
Showing 14 changed files with 474 additions and 11 deletions.
2 changes: 2 additions & 0 deletions .github/scripts/docs-generator/docs-generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ def check_and_create_override_entry(meta_dict, template_dict):

check_and_create_override_entry(meta_dict, template_dict)
q_id = meta_dict['id']
query_page = os.path.join('..', f"{meta_dict.get('platform').lower()}-queries", meta_dict.get('cloudProvider', '').lower(), meta_dict.get('id'))
meta_dict['descriptionText'] += f' (<a href="{query_page}" target="_blank">read more</a>)'
template_dict[platform][sub_platform][severity][category][q_id] = meta_dict
#
# template dict ex:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
# Script created in Python 3.10.8 using only standard libraries
import argparse
import os
import json
import shutil
import time
from pathlib import Path

# Searches for "metadata.json" files within the inputed directory
# Returns a dictionary of dictionaries (output of function get_query_info)
def get_meta_data_and_tests(input_path : str, metadata_file = 'metadata.json'):
all_metadata = Path(input_path).rglob(metadata_file)
queries_data = {}

for path in all_metadata:
query_info = get_query_info(os.path.dirname(path), metadata_file)
query_id = query_info.get("id")
if not query_id: continue
queries_data[query_id] = query_info

return queries_data

# Creates a dictionary with the query information present in the "./metadata.json" file and the "./test" directory
def get_query_info(query_path : str, metadata_file = 'metadata.json', test_extensions = (".json", ".yaml", ".tf", ".dockerfile")):
metadata_file_path = os.path.join(query_path, metadata_file)

if not os.path.exists(metadata_file_path):
raise FileNotFoundError(f"\033[31mFile {metadata_file} doesn't exist in {query_path}\033[0m")

# Get information inside the "./metadata.json" file
with open(metadata_file_path, 'r') as f:
query_metadata_content = json.loads(f.read())

# Find the index of the folder name in the directory path
folder_index = metadata_file_path.rfind("assets\queries")
# If the folder name is found, return everything after it
if folder_index != -1:
path_after_folder = metadata_file_path[folder_index:]
else:
path_after_folder = metadata_file_path
query_metadata_content['githubUrl'] = f"https://github.com/Checkmarx/kics/tree/master/{os.path.dirname(path_after_folder)}"

test_folder_path = os.path.join(query_path, 'test')
if not os.path.isdir(test_folder_path):
raise FileNotFoundError(f"\033[31mFolder {test_folder_path} doesn't exist in {query_path}\033[0m")

expected_result_file_path = os.path.join(test_folder_path, 'positive_expected_result.json')
if not os.path.exists(expected_result_file_path):
raise FileNotFoundError(f"\033[31mFile {expected_result_file_path} doesn't exist in {test_folder_path}\033[0m")

with open(expected_result_file_path) as f:
expected_results = json.loads(f.read())

true_positives = []
true_negatives = []
count_positives = 0
count_negatives = 0
valid_tests = True

# Get information inside the "./test" folder
while valid_tests:
all_tests_saved = True
if count_positives == 0:
positive = "positive"
negative = "negative"
else:
positive = f"positive{count_positives}"
negative = f"negative{count_negatives}"

for extension in test_extensions:
# Positive tests
positive_file_path = os.path.join(query_path, "test", positive + extension)
if os.path.exists(positive_file_path):

with open(positive_file_path, "r") as f:
test_content = f.read()

if count_positives == 0:
test_results = expected_results
else:
test_results = [
f
for f in expected_results
if ((f.get("fileName")
or f.get("filename")
or f.get("file")) == positive + extension)
]
lines = list(set([item["line"] for item in test_results]))
positive_test = dict(fileName=positive + extension, lines=lines, code=test_content)
true_positives.append(positive_test)
all_tests_saved = False

# Negative tests
negative_file_path = os.path.join(query_path, "test", negative + extension)
if os.path.exists(negative_file_path):

with open(negative_file_path, "r") as f:
test_content = f.read()

negative_test = dict(fileName=negative + extension, code=test_content)
true_negatives.append(negative_test)
all_tests_saved = False

if all_tests_saved and count_positives != 0 and count_negatives != 0:
valid_tests = False
else:
count_positives += 1
count_negatives += 1

query_metadata_content['true_positives'] = true_positives
query_metadata_content['true_negatives'] = true_negatives
return query_metadata_content

# Utility for generate ".md" documentation
def format_negative_tests(format_negative_tests : dict) -> str:
result = ''

for idx, x in enumerate(format_negative_tests):
filename = x.get('fileName')
extension = filename.split(".")[-1]
title = f'Negative test num. {idx + 1} - {extension} file'
code = x.get('code')

# If the query has more than 3 tests, the remaining tests are placed in a drop down
if idx <= 2:
result += f'```{extension} title="{title}"\n{code}\n```\n'
else:
result += f"<details><summary>{title}</summary>\n\n"
result += f'```{extension}\n{code}\n```\n'
result += "</details>\n"

return result

# Utility for generate ".md" documentation
def format_positive_tests(positive_tests : dict) -> str:
result = ''

for idx, x in enumerate(positive_tests):
filename = x.get('fileName')
extension = filename.split(".")[-1]
title = f'Postitive test num. {idx + 1} - {extension} file'
code = x.get('code')

results_lines = ''
results_lines_arr = x.get('lines')
results_lines_len = len(results_lines_arr)

if results_lines_len > 0:
results_lines += 'hl_lines="'
for idy, y in enumerate(results_lines_arr):
if idy + 1 >= results_lines_len:
results_lines += str(y)
else:
results_lines += f"{str(y)} "
results_lines += '"'

# If the query has more than 3 tests, the remaining tests are placed in a drop down
if idx <= 2:
result += f'```{extension} title="{title}" {results_lines}\n{code}\n```\n'
else:
result += f"<details><summary>{title}</summary>\n\n"
result += f'```{extension} {results_lines}\n{code}\n```\n'
result += "</details>\n"

return result

# Utility for generate ".md" documentation
def format_severity(severity : str) -> str:
colors = {'High': '#C00', 'Medium': '#C60', 'Low': '#CC0', 'Info': '#00C', 'Trace': '#CCC'}
severity = severity.capitalize()
color = colors.get(severity)
return f'<span style="color:{color}">{severity}</span>'

# Generates a ".md" file for each query
def generate_md_docs(queries_database : str, output_path : str, template_file_path = 'template.md', delete_folders : bool = False):
# Ensure that we are deleting old files generated by this script
if (delete_folders):
platforms = {f"{value.get('platform').lower()}-queries"
for value in queries_database.values()
if value.get("platform") is not None}

# Get a list of all the folders in the path
folders = [folder for folder in os.listdir(output_path) if os.path.isdir(os.path.join(output_path, folder))]

# Iterate over each folder and check if its name matches any of the platforms
for folder in folders:
if folder.lower() in platforms:
folder_path = os.path.join(output_path, folder)
shutil.rmtree(folder_path) # Delete the folder and all its contents

if not os.path.exists(template_file_path):
raise FileNotFoundError("\033[31mtemplate_path doesn't exist in the operating system\033[0m")

with open(template_file_path, 'r') as f:
doc_template = f.read()

for key, query_data in queries_database.items():
query_doc = doc_template
query_doc = doc_template.replace("<QUERY_ID>", key).replace(
"<QUERY_NAME>", query_data.get('queryName')).replace(
"<PLATFORM>", query_data.get('platform')).replace(
"<SEVERITY>", format_severity(query_data.get('severity'))).replace(
"<CATEGORY>", query_data.get('category')).replace(
"<GITHUB_URL>", query_data.get('githubUrl')).replace(
"<DESCRIPTION_TEXT>", query_data.get('descriptionText')).replace(
"<DESCRIPTION_URL>", query_data.get('descriptionUrl')).replace(
"<POSITIVE_TESTS>", format_positive_tests(query_data.get('true_positives'))).replace(
"<NEGATIVE_TESTS>", format_negative_tests(query_data.get('true_negatives')))

platform_folder_path = os.path.join(output_path,
f"{query_data.get('platform').lower()}-queries",
query_data.get('cloudProvider', '').lower())
if not os.path.exists(platform_folder_path):
os.makedirs(platform_folder_path)

# If you are having problems rendering the ".md" pages, try adding encoding='utf-8' as a parameter of "open" function invocation
with open(f"{os.path.join(platform_folder_path, key)}.md", "w") as f:
f.write(query_doc)

# Export a dictionary to a "json" file
def export_to_json(queries_database : dict, output_path : str):
with open(os.path.join(output_path, "queries_database.json"), "w") as f:
json.dump(queries_database, f, indent=4)

def main():
start_time = time.time()

# Script arguments
parser = argparse.ArgumentParser(description="Create/Update documentation page for each query")
parser.add_argument('-p', type=Path, dest='input_path',
help='Folder path to read "metadata.json".', required=True)
parser.add_argument('-o', type=Path, dest='output_path',
help='Folder path to output documentation files.', required=True)
parser.add_argument('-f', type=str, dest='output_format', choices=['json', 'md'],
help='Documentation formats to be created, this script only supports "json" and "md".', required=True)
parser.add_argument('--t', type=Path, dest='template_path',
help='Template file path.')
parser.add_argument('--df', dest='delete_folders', action='store_true',
help='If specified, delete all folders in the specified output_path that match the platform names.')

args = parser.parse_args()

# Validating optional arguments
output_format = args.output_format
if output_format == 'md' and (args.template_path is None):
parser.error("-f json requires --t")

input_path = args.input_path
if not input_path.exists(): raise FileNotFoundError("\033[31minput_path doesn't exist in the operating system\033[0m")

output_path = args.output_path

# Get queries information
queries_database = get_meta_data_and_tests(str(input_path))

# The output of this script depends on the output_format
if output_format == 'json':
export_to_json(queries_database, str(output_path))
print("-->\033[32m JSON file with all queries information created/updated successfully\033[0m")

elif output_format == 'md':
template_path = args.template_path
if not template_path.exists(): raise FileNotFoundError("\033[31mtemplate_path doesn't exist in the operating system\033[0m")

generate_md_docs(queries_database, str(output_path), str(template_path), args.delete_folders)
print("-->\033[32m Documentation .md pages for each query created/updated successfully\033[0m")

end_time = time.time()
elapsed_time = end_time - start_time
print(f"-->\033[34m Elapsed time: {round(elapsed_time, 2)} seconds\033[0m")

if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
---
title: <QUERY_NAME>
hide:
toc: true
navigation: true
---

<style>
.highlight .hll {
background-color: #ff171742;
}
.md-content {
max-width: 1100px;
margin: 0 auto;
}
</style>

- **Query id:** <QUERY_ID>
- **Query name:** <QUERY_NAME>
- **Platform:** <PLATFORM>
- **Severity:** <SEVERITY>
- **Category:** <CATEGORY>
- **URL:** [Github](<GITHUB_URL>)

### Description
<DESCRIPTION_TEXT><br>
[Documentation](<DESCRIPTION_URL>)

### Code samples
#### Code samples with security vulnerabilities
<POSITIVE_TESTS>

#### Code samples without security vulnerabilities
<NEGATIVE_TESTS>
8 changes: 7 additions & 1 deletion .github/workflows/update-docs-queries.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,17 @@ jobs:
- name: Update docs
run: |
pip3 install -r .github/scripts/docs-generator/requirements.txt
python3 -u .github/scripts/docs-generator/docs-generator.py \
python3 -u -B .github/scripts/docs-generator/docs-generator.py \
-p ./assets/queries/ \
-o ./docs/queries/ \
-f md \
-t .github/scripts/docs-generator/templates
python3 -u -B .github/scripts/docs-generator/query-page-generator/query-page-generator.py \
-p ./assets/queries/ \
-o ./docs/queries/ \
-f md \
--t .github/scripts/docs-generator/query-page-generator/templates/query-page-template.md \
--df
- name: Create Pull Request
uses: peter-evans/create-pull-request@v4
with:
Expand Down
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,12 @@ generate-queries-docs: ## generate queries catalog md files
-o ./docs/queries/ \
-f md \
-t .github/generators/templates
@python3 -u .github/scripts/docs-generator/query-page-generator/query-page-generator.py \
-p ./assets/queries/ \
-o ./docs/queries/ \
-f md \
--t .github/scripts/docs-generator/query-page-generator/templates/query-page-template.md \
--df
@echo "\033[36mQueries catalog updated\033[0m"

.PHONY: integration
Expand Down
2 changes: 2 additions & 0 deletions docs/js/custom.js
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,8 @@ function exportToCSV(filename) {
} else if (headerArray[j] == "query") {
var lastIndex = text.lastIndexOf(" ")
text = `"${text.substring(lastIndex + 1)},${text.substring(0, lastIndex)}"`
} else if (headerArray[j] == "description") {
text = text.replace(/\(read more\)/i, '')
}
row.push(text)
}
Expand Down
3 changes: 3 additions & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ google_analytics:
- auto

markdown_extensions:
- pymdownx.highlight:
anchor_linenums: true
- pymdownx.superfences
- toc:
permalink: true
toc_depth: 2
Expand Down
Loading

0 comments on commit a5c8d52

Please sign in to comment.