-
Notifications
You must be signed in to change notification settings - Fork 0
/
cve2csv.py
247 lines (192 loc) · 7.87 KB
/
cve2csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
"""
cve2csv.py
This module extracts CVE vulnerability information from the MITRE CVE database
and saves the results to a CSV file. It performs web scraping to fetch CVE
records based on keywords provided via the command line and converts the results
into a pandas `DataFrame` for easy processing.
Functions:
----------
- get_results_number(soup: BeautifulSoup) -> int:
Extracts and returns the number of vulnerability search results.
- extract_table_data(soup: BeautifulSoup) -> DataFrame | None:
Extracts CVE table data from the HTML page and converts it into a pandas \
`DataFrame`.
Usage:
------
Run this module from the command line with keywords for the search, for example:
python cve2csv.py keyword
This generates a file `cve.csv` in the current directory containing the CVE
search results in CSV format.
Dependencies:
-------------
This module requires the following libraries:
- pandas
- requests
- BeautifulSoup (from bs4)
"""
import logging
import pandas as pd
import requests
import sys
from argparse import ArgumentParser
from pandas import DataFrame
from requests import Response
from bs4 import BeautifulSoup
URL: str = "https://cve.mitre.org/cgi-bin/cvekey.cgi?keyword="
CSV_FILE_NAME: str = 'cve.csv'
def fetch_cve_data(keyword: str) -> str:
"""
Fetches the raw HTML content from the MITRE CVE database for a given search
keyword.
This function builds a URL using the specified keyword to search the MITRE
CVE database and sends an HTTP GET request to retrieve the HTML content. If
the request is successful, it returns the raw HTML as a string. In case of a
request error (e.g., network issues, invalid URL), an error message is
logged, and the program exits.
Parameters
----------
keyword : str
The search keyword used to filter CVE records in the MITRE CVE database.
Returns
-------
str
The raw HTML content of the CVE search results page as a string.
Raises
------
SystemExit
If the HTTP request fails, the function logs the error and exits the
program.
Example
-------
>>> html_content = fetch_cve_data("buffer overflow")
>>> isinstance(html_content, str)
True
"""
try:
response: Response = requests.get(URL + keyword)
response.raise_for_status()
return response.text
except requests.exceptions.RequestException as e:
logging.error(f"Error fetching data: {e}")
sys.exit(1)
def get_results_number(soup: BeautifulSoup) -> int:
"""Get the rumber of results from the response.
Response HTML example::
.. highlight:: html
.. code-block:: html
<h2>Search Results</h2>
<div class="smaller" style="...">
There are <b>0</b> CVE Records that match your search.
</div>
Parameters
----------
soup (BeautifulSoup): A `BeautifulSoup` object containing the parsed\
HTML content.
Returns
-------
int | None: Number of results if possible, otherwise `None`.
"""
return int(b.text) if (h2 := soup.find('h2', string='Search Results'))\
and (div := h2.find_next_sibling())\
and (b := div.find('b')) else 0
def extract_table_data(soup: BeautifulSoup) -> DataFrame | None:
"""Extracts table data from a `BeautifulSoup` object and converts it into a
pandas `DataFrame`.
This function searches for a `<div>` element with the id 'TableWithRules'
within the provided `BeautifulSoup` object. If such a `<div>` is found and
it contains a <table> element, the function extracts the text content of all
`<th>` and `<td>` elements within the table, organizes them into rows, and
converts these rows into a pandas `DataFrame`. The first row of the table
is used as the column headers for the `DataFrame`.
Parameters
----------
soup (BeautifulSoup): A BeautifulSoup object containing the parsed HTML\
content.
Returns
-------
DataFrame | None: A pandas `DataFrame` containing the table data if the\
table is found, otherwise `None`.
"""
if (div := soup.find(id='TableWithRules')) and (table := div.find('table')):
rows: list = [[ele.text.strip() for ele in row.find_all(['th', 'td'])]
for row in table.find_all('tr')]
if not rows or len(rows) < 2:
logging.warning("No valid data found in table.")
return None
if len(rows[0]) != len(rows[1:][0]):
logging.error("Mismatch between header and row columns.")
return None
return pd.DataFrame(rows[1:], columns=rows[0])
logging.warning("No table found with id 'TableWithRules'.")
return None
def save_to_csv(df: DataFrame, output_file: str, delimiter: str = ',',
encoding: str = 'utf-8'):
"""Saves DataFrame to a CSV file with the specified delimiter and encoding.
"""
df.to_csv(output_file, index=False, sep=delimiter, encoding=encoding)
logging.info(f"Data saved to '{output_file}' with delimiter '{delimiter}' "
+ f"and encoding '{encoding}'.")
def create_parser() -> ArgumentParser:
"""
Creates and configures the argument parser for the script.
Returns
-------
argparse.ArgumentParser
Configured ArgumentParser with expected arguments for the script.
"""
parser = ArgumentParser(
description="Fetch CVE data from MITRE and save to CSV.")
parser.add_argument("keyword", help="Search keyword for CVE entries.")
parser.add_argument("-o", "--output", default=CSV_FILE_NAME,
help="Output CSV file name.")
parser.add_argument("-d", "--delimiter", default=',',
help="CSV delimiter.")
parser.add_argument("-e", "--encoding",
default='utf-8', help="CSV encoding.")
# Verbose and quiet options
verbosity_group = parser.add_mutually_exclusive_group()
verbosity_group.add_argument("-v", "--verbose", action="count", default=0,
help="Increase output verbosity."
+ "Use -v, -vv, or -vvv for more verbosity.")
verbosity_group.add_argument("-q", "--quiet", action="store_true",
help="Suppress all output except errors.")
return parser
def configure_logging(verbosity: int, quiet: bool):
"""
Configures logging level based on verbosity and quiet flags.
Parameters
----------
verbosity : int
Verbosity level (0 for default, increases with additional 'v').
quiet : bool
If True, suppresses all logging output except errors.
"""
if quiet:
log_level = logging.CRITICAL
else:
# Mapping verbosity levels to logging levels
log_level = {
0: logging.WARNING, # Default
1: logging.INFO, # -v
2: logging.DEBUG, # -vv
}.get(verbosity, logging.DEBUG) # -vvv or more defaults to DEBUG
logging.basicConfig(
format='%(asctime)s - %(levelname)s - %(message)s',
level=log_level
)
def main(keyword, output, delimiter, encoding):
"""Main function to fetch CVE data and save to CSV."""
logging.info(f'Getting results from {URL}{keyword}')
soup: BeautifulSoup = BeautifulSoup(fetch_cve_data(keyword), 'html.parser')
if n_results := get_results_number(soup):
logging.info(f"There are {n_results} Records that match your search.")
if (df := extract_table_data(soup)) is not None:
save_to_csv(df, output, delimiter=delimiter, encoding=encoding)
else:
logging.warning("No valid data to save; exiting.")
else:
logging.warning("No CVE records found for the given keyword.")
if __name__ == '__main__':
args = create_parser().parse_args()
configure_logging(args.verbose, args.quiet)
main(args.keyword, args.output, args.delimiter, args.encoding)