forked from openedx/edx-platform
-
Notifications
You must be signed in to change notification settings - Fork 2
/
stringify.py
30 lines (24 loc) · 1.08 KB
/
stringify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# lint-amnesty, pylint: disable=missing-module-docstring
from lxml import etree
def stringify_children(node):
'''
Return all contents of an xml tree, without the outside tags.
e.g. if node is parse of
"<html a="b" foo="bar">Hi <div>there <span>Bruce</span><b>!</b></div><html>"
should return
"Hi <div>there <span>Bruce</span><b>!</b></div>"
fixed from
http://stackoverflow.com/questions/4624062/get-all-text-inside-a-tag-in-lxml
'''
# Useful things to know:
# node.tostring() -- generates xml for the node, including start
# and end tags. We'll use this for the children.
# node.text -- the text after the end of a start tag to the start
# of the first child
# node.tail -- the text after the end this tag to the start of the
# next element.
parts = [node.text]
for c in node.getchildren():
parts.append(etree.tostring(c, with_tail=True, encoding='unicode'))
# filter removes possible Nones in texts and tails
return ''.join([part for part in parts if part])