-
Notifications
You must be signed in to change notification settings - Fork 14
/
xml_parser.py
115 lines (105 loc) · 3.81 KB
/
xml_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import xml.sax.handler
import pprint
import sys
class Argument:
def __init__(self):
self.name = ""
self.description = ""
def __str__(self):
return ("(%s, %s)" % (self.name, self.description)).encode("ISO-8859-1")
def __repr__(self):
return self.__str__()
class Function:
def __init__(self):
self.name = ""
self.dll = ""
self.description = ""
self.arguments = []
self.returns = ""
def __str__(self):
return self.name
def __repr__(self):
return self.__str__()
class FunctionHandler(xml.sax.handler.ContentHandler):
IN_FUNCTION = 1
IN_FUNCTION_NAME = 2
IN_DLL = 3
IN_FUNCTION_DESCRIPTION = 4
IN_ARGUMENTS = 5
IN_ARGUMENT = 6
IN_ARGUMENT_NAME = 7
IN_ARGUMENT_DESCRIPTION = 8
IN_RETURNS = 9
def __init__(self):
self.inTitle = 0
self.mapping = {}
self.current_step = 0
self.functions = [ ]
def startElement(self, name, attributes):
if name == "msdn":
pass
elif name == "functions":
pass
elif name == "function":
self.current_step = FunctionHandler.IN_FUNCTION
self.function = Function()
elif self.current_step == FunctionHandler.IN_FUNCTION and name == "name":
self.current_step = FunctionHandler.IN_FUNCTION_NAME
elif self.current_step == FunctionHandler.IN_ARGUMENT and name == "name":
self.current_step = FunctionHandler.IN_ARGUMENT_NAME
elif name == "dll":
self.current_step = FunctionHandler.IN_DLL
elif self.current_step == FunctionHandler.IN_FUNCTION and name == "description":
self.current_step = FunctionHandler.IN_FUNCTION_DESCRIPTION
elif self.current_step == FunctionHandler.IN_ARGUMENT and name == "description":
self.current_step = FunctionHandler.IN_ARGUMENT_DESCRIPTION
elif name == "arguments":
self.current_step = FunctionHandler.IN_ARGUMENTS
elif name == "argument":
self.current_step = FunctionHandler.IN_ARGUMENT
self.current_argument = Argument()
elif name == "returns":
self.current_step = FunctionHandler.IN_RETURNS
else:
print "Error: ", name
sys.exit(0)
def characters(self, data):
if self.current_step == FunctionHandler.IN_FUNCTION_NAME:
self.function.name = self.function.name + data
elif self.current_step == FunctionHandler.IN_DLL:
self.function.dll = self.function.dll + data
elif self.current_step == FunctionHandler.IN_FUNCTION_DESCRIPTION:
self.function.description = self.function.description + data
elif self.current_step == FunctionHandler.IN_ARGUMENT_NAME:
self.current_argument.name = self.current_argument.name + data
elif self.current_step == FunctionHandler.IN_ARGUMENT_DESCRIPTION:
self.current_argument.description = self.current_argument.description + data
elif self.current_step == FunctionHandler.IN_RETURNS:
self.function.returns = self.function.returns + data
def endElement(self, name):
if name in ["function", "functions", "msdn"]:
self.functions.append(self.function)
elif self.current_step in [FunctionHandler.IN_ARGUMENT_NAME, FunctionHandler.IN_ARGUMENT_DESCRIPTION]:
self.current_step = FunctionHandler.IN_ARGUMENT
elif name in ["name", "dll", "description", "arguments", "returns"]:
self.current_step = FunctionHandler.IN_FUNCTION
elif name == "argument":
self.current_step = FunctionHandler.IN_ARGUMENT
self.function.arguments.append(self.current_argument)
else:
print "Error: ", name
sys.exit(0)
def parse(xmlfile):
parser = xml.sax.make_parser()
handler = FunctionHandler()
parser.setContentHandler(handler)
parser.parse(xmlfile)
return handler.functions
#for function in handler.functions:
# print function.name.encode("ISO-8859-1")
# print function.dll.encode("ISO-8859-1")
# print function.description.encode("ISO-8859-1")
# print function.returns.encode("ISO-8859-1")
#
# for argument in function.arguments:
# print argument