forked from MrWQ/cve_description
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cve_description.py
88 lines (77 loc) · 2.44 KB
/
cve_description.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# -*- coding: utf-8 -*-
# @Time : 2022/7/6 13:46
# @Author : ordar
# @Project : cve_description
# @File : cve_description.py
# @Python: 3.7.5
import requests
import datetime
import re
import json
# 黑名单,防止github数据污染
black_list = []
with open("blick_list.txt", "r", encoding="utf8") as bf:
temp = bf.readlines()
for i in temp:
black_list.append(i.strip())
search_url = "https://raw.githubusercontent.com/ycdxsb/PocOrExp_in_Github/main/PocOrExp.md"
cdn_search_url = "https://cdn.jsdelivr.net/gh/ycdxsb/PocOrExp_in_Github@main/PocOrExp.md"
resp = requests.get(search_url)
pre_html = resp.text
# 规范格式
pre_html = pre_html.replace("\n\n\n", "\n")
pre_html = pre_html.replace("\n##", "\n\n##")
# print(pre_html)
# 去掉年份
today_year = int(datetime.datetime.now().year)
# next_year = today_year + 1
for i in range(1999, today_year):
pre_html = pre_html.replace("\n## {}\n".format(str(i)), "")
pre_html = pre_html.replace("## {}\n".format(str(today_year)), "")
# 规范格式
html = pre_html.replace("\n-", "-")
presult = re.findall(r"## (CVE-\d+-\d+)\n(.*?)\n", html)
description_list = []
for i in presult:
description = i[1]
if str(description).endswith("."):
pass
else:
description_list.append(description)
description_list = list(set(description_list))
# 给没有.的描述手动加.
for description in description_list:
html = html.replace(description, description + ".")
html = html.replace(".\n", ".\n\n")
html = html.replace("\n-", "-")
html = html + "\n"
# print(html)
# 提取数据
pre_results = re.findall(r"## (CVE-\d+-\d+)\n(.*?)\n(.*?)\n\n", html)
# print(pre_results)
# 生成字典
results = []
for i in pre_results:
item = {}
item["cve_code"] = i[0]
item["description"] = i[1]
# 移除“已保留”的编号
# if str(item["description"]).startswith(" ** RE.SERVED ** "):
# continue
# 移除“有争议”的编号
# if str(item["description"]).startswith(" ** DISP.UTED ** "):
# continue
item["poc"] = []
a = str(i[2]).replace("- ", "\n- ").strip() + "\n"
temp_poc = re.findall(r'\[(.*?)\]', a)
if temp_poc:
for j in temp_poc:
if "http" in j and j not in black_list:
item["poc"].append(j)
print(item)
results.append(item)
for i in results:
print(i)
# 写入文件
with open("cve_description_poc.json", 'w', encoding='utf8') as f:
f.write(json.dumps(results, indent=2))