-
Notifications
You must be signed in to change notification settings - Fork 2
/
jatstools.py
22 lines (19 loc) · 1.06 KB
/
jatstools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import lxml.etree as ET
class XML:
def __init__(self, file):
self.file = ET.parse(file)
def get_text(self, section):
sections = self.file.xpath(f'.//sec[contains(@sec-type, "{section}")]')
if sections:
return ' '.join(ET.tostring(sections[0], encoding='utf-8', method='text').decode('utf-8').replace('\n', ' ').split())
else:
# for html only
for header in self.file.xpath('.//h2'):
if section.replace('methods', 'method') in ET.tostring(header, encoding='utf-8', method='text').decode('utf-8').lower():
section_xml = ET.tostring(self.file).decode('utf-8').split(ET.tostring(header).decode('utf-8'))[1].split('<h2')[0]
while True:
try:
return ' '.join(ET.tostring(ET.fromstring('<div>' + section_xml + '</div>'), encoding='utf-8', method='text').decode('utf-8').replace('\n', ' ').split())
except:
section_xml = section_xml[:-1]
return ''