forked from wichmannpas/VersionInferrer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
analyze_site.py
executable file
·129 lines (102 loc) · 4.06 KB
/
analyze_site.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env python3
import json
import logging
import os
import sys
from argparse import ArgumentParser, Namespace
from fnmatch import fnmatch
import settings
from analysis.website_analyzer import WebsiteAnalyzer
from base.json import CustomJSONEncoder
from definitions import definitions
def analyze(arguments: Namespace):
"""Analyse a site to infer its used software package(s) and versions."""
analyzer = WebsiteAnalyzer(
primary_url=arguments.primary_url,
cache_file=arguments.cache_file)
if arguments.persist_resources:
assert os.path.isdir(arguments.persist_resources) or \
not os.path.exists(arguments.persist_resources), 'invalid persist path'
analyzer.persist_resources = arguments.persist_resources
if arguments.json_only:
logging.disable(logging.CRITICAL)
for setting, typ in settings.OVERWRITABLE_SETTINGS:
val = getattr(arguments, setting.lower(), None)
if val is not None:
setattr(settings, setting, val)
if arguments.complete_index_retrieval_for:
packages = [
definition.software_package
for definition in definitions
if any(
fnmatch(
name.lower(), arguments.complete_index_retrieval_for.lower())
for name in [definition.software_package.name] + (definition.software_package.alternative_names or [])
)
]
result = analyzer.perform_complete_index_retrieval_for(packages, arguments.dry_run)
else:
result = analyzer.analyze()
if not arguments.json_only:
print(analyzer.get_statistics())
if arguments.debug_json_file:
with open(arguments.debug_json_file, 'w') as debug_file:
json.dump(analyzer.debug_info, debug_file, cls=CustomJSONEncoder)
json_file = sys.stdout
if arguments.json_file:
# TODO: make sure file is closed again
json_file = open(arguments.json_file, 'w')
if not result:
json.dump({}, json_file)
return
if not arguments.json_only:
print(result)
more_recent = analyzer.more_recent_version(
guess.software_version for guess in result)
if more_recent and not arguments.json_only:
print(
'More recent version {} released, possibly outdated!'.format(
more_recent))
if arguments.json or arguments.json_only or json_file != sys.stdout:
json.dump({
'result': result,
'more_recent': more_recent,
}, json_file, cls=CustomJSONEncoder)
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument('primary_url')
for setting, typ in settings.OVERWRITABLE_SETTINGS:
parser.add_argument('--{}'.format(
setting.lower().replace('_', '-')),
type=typ)
parser.add_argument(
'--complete-index-retrieval-for', '-c',
help='Retrieve all assets from the index for software packages that match this pattern')
parser.add_argument(
'--dry-run', '-n', action='store_true', default=False,
help='Only determine which assets to retrieve')
parser.add_argument(
'--json', action='store_true',
help='Write JSON output to stdout.')
parser.add_argument(
'--json-only', action='store_true',
help='Only output JSON data to stdout.')
parser.add_argument(
'--json-file',
help='Write JSON output to specified file.')
parser.add_argument(
'--cache-file',
'-s',
help='Use specified file as a cache to store retrieved assets.')
parser.add_argument(
'--persist-resources',
'-p',
help='Persist retrieved resources within the specified path for debugging purposes.')
parser.add_argument(
'--debug-json-file',
'-d',
help='Write JSON debug output to this specified file')
arguments = parser.parse_args()
if arguments.dry_run and not arguments.complete_index_retrieval_for:
raise ValueError('dry run is only valid for complete index retrieval!')
analyze(arguments)