1
2
3 """
4 Tests specific to the extended etree API
5
6 Tests that apply to the general ElementTree API should go into
7 test_elementtree
8 """
9
10 from __future__ import absolute_import
11
12 import os.path
13 import unittest
14 import copy
15 import sys
16 import re
17 import gc
18 import operator
19 import tempfile
20 import textwrap
21 import zlib
22 import gzip
23 from contextlib import closing, contextmanager
24
25 from .common_imports import etree, StringIO, BytesIO, HelperTestCase
26 from .common_imports import fileInTestDir, fileUrlInTestDir, read_file, path2url
27 from .common_imports import SillyFileLike, LargeFileLikeUnicode, doctest, make_doctest
28 from .common_imports import canonicalize, _str, _bytes
29
30 print("")
31 print("TESTED VERSION: %s" % etree.__version__)
32 print(" Python: " + repr(sys.version_info))
33 print(" lxml.etree: " + repr(etree.LXML_VERSION))
34 print(" libxml used: " + repr(etree.LIBXML_VERSION))
35 print(" libxml compiled: " + repr(etree.LIBXML_COMPILED_VERSION))
36 print(" libxslt used: " + repr(etree.LIBXSLT_VERSION))
37 print(" libxslt compiled: " + repr(etree.LIBXSLT_COMPILED_VERSION))
38 print("")
39
40 try:
41 _unicode = unicode
42 except NameError:
43
44 _unicode = str
45
46
47 @contextmanager
48 -def tmpfile():
55
58 """Tests only for etree, not ElementTree"""
59 etree = etree
60
71
80
88
95
97 Element = self.etree.Element
98 el = Element('name')
99 self.assertRaises(ValueError, Element, '{}')
100 self.assertRaises(ValueError, setattr, el, 'tag', '{}')
101
102 self.assertRaises(ValueError, Element, '{test}')
103 self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
104
106 Element = self.etree.Element
107 self.assertRaises(ValueError, Element, 'p:name')
108 self.assertRaises(ValueError, Element, '{test}p:name')
109
110 el = Element('name')
111 self.assertRaises(ValueError, setattr, el, 'tag', 'p:name')
112
114 Element = self.etree.Element
115 self.assertRaises(ValueError, Element, "p'name")
116 self.assertRaises(ValueError, Element, 'p"name')
117
118 self.assertRaises(ValueError, Element, "{test}p'name")
119 self.assertRaises(ValueError, Element, '{test}p"name')
120
121 el = Element('name')
122 self.assertRaises(ValueError, setattr, el, 'tag', "p'name")
123 self.assertRaises(ValueError, setattr, el, 'tag', 'p"name')
124
126 Element = self.etree.Element
127 self.assertRaises(ValueError, Element, ' name ')
128 self.assertRaises(ValueError, Element, 'na me')
129 self.assertRaises(ValueError, Element, '{test} name')
130
131 el = Element('name')
132 self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
133
141
149
151 Element = self.etree.Element
152 SubElement = self.etree.SubElement
153
154 el = Element('name')
155 self.assertRaises(ValueError, SubElement, el, "p'name")
156 self.assertRaises(ValueError, SubElement, el, "{test}p'name")
157
158 self.assertRaises(ValueError, SubElement, el, 'p"name')
159 self.assertRaises(ValueError, SubElement, el, '{test}p"name')
160
169
178
180 QName = self.etree.QName
181 self.assertRaises(ValueError, QName, '')
182 self.assertRaises(ValueError, QName, None)
183 self.assertRaises(ValueError, QName, None, None)
184 self.assertRaises(ValueError, QName, 'test', '')
185
192
194 QName = self.etree.QName
195 self.assertRaises(ValueError, QName, 'p:name')
196 self.assertRaises(ValueError, QName, 'test', 'p:name')
197
199 QName = self.etree.QName
200 self.assertRaises(ValueError, QName, ' name ')
201 self.assertRaises(ValueError, QName, 'na me')
202 self.assertRaises(ValueError, QName, 'test', ' name')
203
211
213
214 QName = self.etree.QName
215 qname1 = QName('http://myns', 'a')
216 a = self.etree.Element(qname1, nsmap={'p' : 'http://myns'})
217
218 qname2 = QName(a)
219 self.assertEqual(a.tag, qname1.text)
220 self.assertEqual(a.tag, qname1)
221 self.assertEqual(qname1.text, qname2.text)
222 self.assertEqual(qname1, qname2.text)
223 self.assertEqual(qname1.text, qname2)
224 self.assertEqual(qname1, qname2)
225
227
228 etree = self.etree
229 qname = etree.QName('http://myns', 'a')
230 a = etree.Element(qname, nsmap={'p' : 'http://myns'})
231 a.text = qname
232
233 self.assertEqual("p:a", a.text)
234
243
258
264
274
286
288 Element = self.etree.Element
289
290 keys = ["attr%d" % i for i in range(10)]
291 values = ["TEST-%d" % i for i in range(10)]
292 items = list(zip(keys, values))
293
294 root = Element("root")
295 for key, value in items:
296 root.set(key, value)
297 self.assertEqual(keys, root.attrib.keys())
298 self.assertEqual(values, root.attrib.values())
299
300 root2 = Element("root2", root.attrib,
301 attr_99='TOAST-1', attr_98='TOAST-2')
302 self.assertEqual(['attr_98', 'attr_99'] + keys,
303 root2.attrib.keys())
304 self.assertEqual(['TOAST-2', 'TOAST-1'] + values,
305 root2.attrib.values())
306
307 self.assertEqual(keys, root.attrib.keys())
308 self.assertEqual(values, root.attrib.values())
309
311
312
313 Element = self.etree.Element
314 root = Element("root")
315 self.assertRaises(TypeError, root.set, "newattr", 5)
316 self.assertRaises(TypeError, root.set, "newattr", object)
317 self.assertRaises(TypeError, root.set, "newattr", None)
318 self.assertRaises(TypeError, root.set, "newattr")
319
333
355
357 XML = self.etree.XML
358 xml = _bytes('<test><a><b><c/></b></a><x><a><b/><c/></a></x></test>')
359
360 root = XML(xml)
361 self.etree.strip_elements(root, 'a')
362 self.assertEqual(_bytes('<test><x></x></test>'),
363 self._writeElement(root))
364
365 root = XML(xml)
366 self.etree.strip_elements(root, 'b', 'c', 'X', 'Y', 'Z')
367 self.assertEqual(_bytes('<test><a></a><x><a></a></x></test>'),
368 self._writeElement(root))
369
370 root = XML(xml)
371 self.etree.strip_elements(root, 'c')
372 self.assertEqual(_bytes('<test><a><b></b></a><x><a><b></b></a></x></test>'),
373 self._writeElement(root))
374
376 XML = self.etree.XML
377 xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>C</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
378
379 root = XML(xml)
380 self.etree.strip_elements(root, 'a')
381 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X</x>XT</test>'),
382 self._writeElement(root))
383
384 root = XML(xml)
385 self.etree.strip_elements(root, '{urn:a}b', 'c')
386 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
387 self._writeElement(root))
388
389 root = XML(xml)
390 self.etree.strip_elements(root, '{urn:a}*', 'c')
391 self.assertEqual(_bytes('<test>TEST<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
392 self._writeElement(root))
393
394 root = XML(xml)
395 self.etree.strip_elements(root, '{urn:a}*', 'c', with_tail=False)
396 self.assertEqual(_bytes('<test>TESTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
397 self._writeElement(root))
398
417
443
470
497
516
529
540
546
548 XML = self.etree.XML
549 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
550 self.assertEqual(root[0].target, "mypi")
551 self.assertEqual(root[0].get('my'), "1")
552 self.assertEqual(root[0].get('test'), " abc ")
553 self.assertEqual(root[0].get('quotes'), "' '")
554 self.assertEqual(root[0].get('only'), None)
555 self.assertEqual(root[0].get('names'), None)
556 self.assertEqual(root[0].get('nope'), None)
557
559 XML = self.etree.XML
560 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
561 self.assertEqual(root[0].target, "mypi")
562 self.assertEqual(root[0].attrib['my'], "1")
563 self.assertEqual(root[0].attrib['test'], " abc ")
564 self.assertEqual(root[0].attrib['quotes'], "' '")
565 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'only')
566 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'names')
567 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'nope')
568
570
571 ProcessingInstruction = self.etree.ProcessingInstruction
572
573 a = ProcessingInstruction("PI", "ONE")
574 b = copy.deepcopy(a)
575 b.text = "ANOTHER"
576
577 self.assertEqual('ONE', a.text)
578 self.assertEqual('ANOTHER', b.text)
579
595
610
621
633
652
657
670
681
682 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
683 events = list(iterparse(f, events=('end', 'comment')))
684 root = events[-1][1]
685 self.assertEqual(6, len(events))
686 self.assertEqual(['A', ' B ', 'c', 'b', 'C', 'a'],
687 [ name(*item) for item in events ])
688 self.assertEqual(
689 _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
690 tostring(root))
691
703
704 f = BytesIO('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>')
705 events = list(iterparse(f, events=('end', 'pi')))
706 root = events[-2][1]
707 self.assertEqual(8, len(events))
708 self.assertEqual([('pia','a'), ('pib','b'), ('pic','c'), 'c', 'b',
709 ('pid','d'), 'a', ('pie','e')],
710 [ name(*item) for item in events ])
711 self.assertEqual(
712 _bytes('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>'),
713 tostring(ElementTree(root)))
714
729
735
737 iterparse = self.etree.iterparse
738 f = BytesIO('<a><b><c/></a>')
739 it = iterparse(f, events=('start', 'end'), recover=True)
740 events = [(ev, el.tag) for ev, el in it]
741 root = it.root
742 self.assertTrue(root is not None)
743
744 self.assertEqual(1, events.count(('start', 'a')))
745 self.assertEqual(1, events.count(('end', 'a')))
746
747 self.assertEqual(1, events.count(('start', 'b')))
748 self.assertEqual(1, events.count(('end', 'b')))
749
750 self.assertEqual(1, events.count(('start', 'c')))
751 self.assertEqual(1, events.count(('end', 'c')))
752
754 iterparse = self.etree.iterparse
755 f = BytesIO('<a><b><c/></d><b><c/></a></b>')
756 it = iterparse(f, events=('start', 'end'), recover=True)
757 events = [(ev, el.tag) for ev, el in it]
758 root = it.root
759 self.assertTrue(root is not None)
760
761 self.assertEqual(1, events.count(('start', 'a')))
762 self.assertEqual(1, events.count(('end', 'a')))
763
764 self.assertEqual(2, events.count(('start', 'b')))
765 self.assertEqual(2, events.count(('end', 'b')))
766
767 self.assertEqual(2, events.count(('start', 'c')))
768 self.assertEqual(2, events.count(('end', 'c')))
769
771 iterparse = self.etree.iterparse
772 f = BytesIO("""
773 <a> \n \n <b> b test </b> \n
774
775 \n\t <c> \n </c> </a> \n """)
776 iterator = iterparse(f, remove_blank_text=True)
777 text = [ (element.text, element.tail)
778 for event, element in iterator ]
779 self.assertEqual(
780 [(" b test ", None), (" \n ", None), (None, None)],
781 text)
782
784 iterparse = self.etree.iterparse
785 f = BytesIO('<a><b><d/></b><c/></a>')
786
787 iterator = iterparse(f, tag="b", events=('start', 'end'))
788 events = list(iterator)
789 root = iterator.root
790 self.assertEqual(
791 [('start', root[0]), ('end', root[0])],
792 events)
793
795 iterparse = self.etree.iterparse
796 f = BytesIO('<a><b><d/></b><c/></a>')
797
798 iterator = iterparse(f, tag="*", events=('start', 'end'))
799 events = list(iterator)
800 self.assertEqual(
801 8,
802 len(events))
803
805 iterparse = self.etree.iterparse
806 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
807
808 iterator = iterparse(f, tag="{urn:test:1}b", events=('start', 'end'))
809 events = list(iterator)
810 root = iterator.root
811 self.assertEqual(
812 [('start', root[0]), ('end', root[0])],
813 events)
814
816 iterparse = self.etree.iterparse
817 f = BytesIO('<a><b><d/></b><c/></a>')
818 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
819 events = list(iterator)
820 root = iterator.root
821 self.assertEqual(
822 [('start', root[0]), ('end', root[0])],
823 events)
824
825 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
826 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
827 events = list(iterator)
828 root = iterator.root
829 self.assertEqual([], events)
830
832 iterparse = self.etree.iterparse
833 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
834 iterator = iterparse(f, tag="{urn:test:1}*", events=('start', 'end'))
835 events = list(iterator)
836 self.assertEqual(8, len(events))
837
839 iterparse = self.etree.iterparse
840 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
841 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
842 events = list(iterator)
843 self.assertEqual([], events)
844
845 f = BytesIO('<a><b><d/></b><c/></a>')
846 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
847 events = list(iterator)
848 self.assertEqual(8, len(events))
849
851 text = _str('Søk på nettet')
852 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
853 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
854 ).encode('iso-8859-1')
855
856 self.assertRaises(self.etree.ParseError,
857 list, self.etree.iterparse(BytesIO(xml_latin1)))
858
860 text = _str('Søk på nettet', encoding="UTF-8")
861 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
862 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
863 ).encode('iso-8859-1')
864
865 iterator = self.etree.iterparse(BytesIO(xml_latin1),
866 encoding="iso-8859-1")
867 self.assertEqual(1, len(list(iterator)))
868
869 a = iterator.root
870 self.assertEqual(a.text, text)
871
873 tostring = self.etree.tostring
874 f = BytesIO('<root><![CDATA[test]]></root>')
875 context = self.etree.iterparse(f, strip_cdata=False)
876 content = [ el.text for event,el in context ]
877
878 self.assertEqual(['test'], content)
879 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
880 tostring(context.root))
881
885
890
909
910
911
934
935
936
938 assertEqual = self.assertEqual
939 assertFalse = self.assertFalse
940
941 events = []
942 class Target(object):
943 def start(self, tag, attrib):
944 events.append("start")
945 assertFalse(attrib)
946 assertEqual("TAG", tag)
947 def end(self, tag):
948 events.append("end")
949 assertEqual("TAG", tag)
950 def close(self):
951 return "DONE"
952
953 parser = self.etree.XMLParser(target=Target())
954 tree = self.etree.ElementTree()
955
956 self.assertRaises(TypeError,
957 tree.parse, BytesIO("<TAG/>"), parser=parser)
958 self.assertEqual(["start", "end"], events)
959
961
962 events = []
963 class Target(object):
964 def start(self, tag, attrib):
965 events.append("start-" + tag)
966 def end(self, tag):
967 events.append("end-" + tag)
968 if tag == 'a':
969 raise ValueError("dead and gone")
970 def data(self, data):
971 events.append("data-" + data)
972 def close(self):
973 events.append("close")
974 return "DONE"
975
976 parser = self.etree.XMLParser(target=Target())
977
978 try:
979 parser.feed(_bytes('<root>A<a>ca</a>B</root>'))
980 done = parser.close()
981 self.fail("error expected, but parsing succeeded")
982 except ValueError:
983 done = 'value error received as expected'
984
985 self.assertEqual(["start-root", "data-A", "start-a",
986 "data-ca", "end-a", "close"],
987 events)
988
990
991 events = []
992 class Target(object):
993 def start(self, tag, attrib):
994 events.append("start-" + tag)
995 def end(self, tag):
996 events.append("end-" + tag)
997 if tag == 'a':
998 raise ValueError("dead and gone")
999 def data(self, data):
1000 events.append("data-" + data)
1001 def close(self):
1002 events.append("close")
1003 return "DONE"
1004
1005 parser = self.etree.XMLParser(target=Target())
1006
1007 try:
1008 done = self.etree.fromstring(_bytes('<root>A<a>ca</a>B</root>'),
1009 parser=parser)
1010 self.fail("error expected, but parsing succeeded")
1011 except ValueError:
1012 done = 'value error received as expected'
1013
1014 self.assertEqual(["start-root", "data-A", "start-a",
1015 "data-ca", "end-a", "close"],
1016 events)
1017
1019
1020 events = []
1021 class Target(object):
1022 def start(self, tag, attrib):
1023 events.append("start-" + tag)
1024 def end(self, tag):
1025 events.append("end-" + tag)
1026 def data(self, data):
1027 events.append("data-" + data)
1028 def comment(self, text):
1029 events.append("comment-" + text)
1030 def close(self):
1031 return "DONE"
1032
1033 parser = self.etree.XMLParser(target=Target(), collect_ids=False)
1034
1035 parser.feed(_bytes('<!--a--><root xml:id="123">A<!--b-->'))
1036 parser.feed(_bytes('<sub xml:id="321"/>B</root>'))
1037 done = parser.close()
1038
1039 self.assertEqual("DONE", done)
1040 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1041 "start-sub", "end-sub", "data-B", "end-root"],
1042 events)
1043
1049 def end(self, tag):
1050 events.append("end-" + tag)
1051 def data(self, data):
1052 events.append("data-" + data)
1053 def comment(self, text):
1054 events.append("comment-" + text)
1055 def close(self):
1056 return "DONE"
1057
1058 parser = self.etree.XMLParser(target=Target())
1059
1060 parser.feed(_bytes('<!--a--><root>A<!--b--><sub/><!--c-->B</root><!--d-->'))
1061 done = parser.close()
1062
1063 self.assertEqual("DONE", done)
1064 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1065 "start-sub", "end-sub", "comment-c", "data-B",
1066 "end-root", "comment-d"],
1067 events)
1068
1070 events = []
1071 class Target(object):
1072 def start(self, tag, attrib):
1073 events.append("start-" + tag)
1074 def end(self, tag):
1075 events.append("end-" + tag)
1076 def data(self, data):
1077 events.append("data-" + data)
1078 def pi(self, target, data):
1079 events.append("pi-" + target + "-" + data)
1080 def close(self):
1081 return "DONE"
1082
1083 parser = self.etree.XMLParser(target=Target())
1084
1085 parser.feed(_bytes('<?test a?><root>A<?test b?>B</root><?test c?>'))
1086 done = parser.close()
1087
1088 self.assertEqual("DONE", done)
1089 self.assertEqual(["pi-test-a", "start-root", "data-A", "pi-test-b",
1090 "data-B", "end-root", "pi-test-c"],
1091 events)
1092
1094 events = []
1095 class Target(object):
1096 def start(self, tag, attrib):
1097 events.append("start-" + tag)
1098 def end(self, tag):
1099 events.append("end-" + tag)
1100 def data(self, data):
1101 events.append("data-" + data)
1102 def close(self):
1103 return "DONE"
1104
1105 parser = self.etree.XMLParser(target=Target(),
1106 strip_cdata=False)
1107
1108 parser.feed(_bytes('<root>A<a><![CDATA[ca]]></a>B</root>'))
1109 done = parser.close()
1110
1111 self.assertEqual("DONE", done)
1112 self.assertEqual(["start-root", "data-A", "start-a",
1113 "data-ca", "end-a", "data-B", "end-root"],
1114 events)
1115
1117 events = []
1118 class Target(object):
1119 def start(self, tag, attrib):
1120 events.append("start-" + tag)
1121 def end(self, tag):
1122 events.append("end-" + tag)
1123 def data(self, data):
1124 events.append("data-" + data)
1125 def close(self):
1126 events.append("close")
1127 return "DONE"
1128
1129 parser = self.etree.XMLParser(target=Target(),
1130 recover=True)
1131
1132 parser.feed(_bytes('<root>A<a>ca</a>B</not-root>'))
1133 done = parser.close()
1134
1135 self.assertEqual("DONE", done)
1136 self.assertEqual(["start-root", "data-A", "start-a",
1137 "data-ca", "end-a", "data-B",
1138 "end-root", "close"],
1139 events)
1140
1150
1160
1169
1179
1181 iterwalk = self.etree.iterwalk
1182 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1183
1184 iterator = iterwalk(root, events=('start','end'))
1185 events = list(iterator)
1186 self.assertEqual(
1187 [('start', root), ('start', root[0]), ('end', root[0]),
1188 ('start', root[1]), ('end', root[1]), ('end', root)],
1189 events)
1190
1201
1203 iterwalk = self.etree.iterwalk
1204 root = self.etree.XML(_bytes('<a xmlns="ns1"><b><c xmlns="ns2"/></b></a>'))
1205
1206 attr_name = '{testns}bla'
1207 events = []
1208 iterator = iterwalk(root, events=('start','end','start-ns','end-ns'))
1209 for event, elem in iterator:
1210 events.append(event)
1211 if event == 'start':
1212 if elem.tag != '{ns1}a':
1213 elem.set(attr_name, 'value')
1214
1215 self.assertEqual(
1216 ['start-ns', 'start', 'start', 'start-ns', 'start',
1217 'end', 'end-ns', 'end', 'end', 'end-ns'],
1218 events)
1219
1220 self.assertEqual(
1221 None,
1222 root.get(attr_name))
1223 self.assertEqual(
1224 'value',
1225 root[0].get(attr_name))
1226
1239
1241 iterwalk = self.etree.iterwalk
1242 root = self.etree.XML(_bytes('<a><b><c/></b><d><e/></d></a>'))
1243
1244 iterator = iterwalk(root, events=('start', 'end'))
1245 tags = []
1246 for event, elem in iterator:
1247 tags.append((event, elem.tag))
1248 if elem.tag in ('b', 'e'):
1249
1250 iterator.skip_subtree()
1251
1252 self.assertEqual(
1253 [('start', 'a'),
1254 ('start', 'b'), ('end', 'b'),
1255 ('start', 'd'),
1256 ('start', 'e'), ('end', 'e'),
1257 ('end', 'd'),
1258 ('end', 'a')],
1259 tags)
1260
1262 iterwalk = self.etree.iterwalk
1263 root = self.etree.XML(_bytes(
1264 '<a xmlns="ns1"><b xmlns="nsb"><c xmlns="ns2"/></b><d xmlns="ns2"><e/></d></a>'))
1265
1266 events = []
1267 iterator = iterwalk(root, events=('start','start-ns','end-ns'))
1268 for event, elem in iterator:
1269 if event in ('start-ns', 'end-ns'):
1270 events.append((event, elem))
1271 if event == 'start-ns' and elem == ('', 'nsb'):
1272 events.append('skip')
1273 iterator.skip_subtree()
1274 else:
1275 events.append((event, elem.tag))
1276
1277 self.assertEqual(
1278 [('start-ns', ('', 'ns1')),
1279 ('start', '{ns1}a'),
1280 ('start-ns', ('', 'nsb')),
1281 'skip',
1282 ('start', '{nsb}b'),
1283 ('end-ns', None),
1284 ('start-ns', ('', 'ns2')),
1285 ('start', '{ns2}d'),
1286 ('start', '{ns2}e'),
1287 ('end-ns', None),
1288 ('end-ns', None)
1289 ],
1290 events)
1291
1302
1304 parse = self.etree.parse
1305 parser = self.etree.XMLParser(dtd_validation=True)
1306 assertEqual = self.assertEqual
1307 test_url = _str("__nosuch.dtd")
1308
1309 class MyResolver(self.etree.Resolver):
1310 def resolve(self, url, id, context):
1311 assertEqual(url, test_url)
1312 return self.resolve_string(
1313 _str('''<!ENTITY myentity "%s">
1314 <!ELEMENT doc ANY>''') % url, context)
1315
1316 parser.resolvers.add(MyResolver())
1317
1318 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1319 tree = parse(StringIO(xml), parser)
1320 root = tree.getroot()
1321 self.assertEqual(root.text, test_url)
1322
1324 parse = self.etree.parse
1325 parser = self.etree.XMLParser(dtd_validation=True)
1326 assertEqual = self.assertEqual
1327 test_url = _str("__nosuch.dtd")
1328
1329 class MyResolver(self.etree.Resolver):
1330 def resolve(self, url, id, context):
1331 assertEqual(url, test_url)
1332 return self.resolve_string(
1333 (_str('''<!ENTITY myentity "%s">
1334 <!ELEMENT doc ANY>''') % url).encode('utf-8'),
1335 context)
1336
1337 parser.resolvers.add(MyResolver())
1338
1339 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1340 tree = parse(StringIO(xml), parser)
1341 root = tree.getroot()
1342 self.assertEqual(root.text, test_url)
1343
1345 parse = self.etree.parse
1346 parser = self.etree.XMLParser(dtd_validation=True)
1347 assertEqual = self.assertEqual
1348 test_url = _str("__nosuch.dtd")
1349
1350 class MyResolver(self.etree.Resolver):
1351 def resolve(self, url, id, context):
1352 assertEqual(url, test_url)
1353 return self.resolve_file(
1354 SillyFileLike(
1355 _str('''<!ENTITY myentity "%s">
1356 <!ELEMENT doc ANY>''') % url), context)
1357
1358 parser.resolvers.add(MyResolver())
1359
1360 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1361 tree = parse(StringIO(xml), parser)
1362 root = tree.getroot()
1363 self.assertEqual(root.text, test_url)
1364
1366 parse = self.etree.parse
1367 parser = self.etree.XMLParser(attribute_defaults=True)
1368 assertEqual = self.assertEqual
1369 test_url = _str("__nosuch.dtd")
1370
1371 class MyResolver(self.etree.Resolver):
1372 def resolve(self, url, id, context):
1373 assertEqual(url, test_url)
1374 return self.resolve_filename(
1375 fileInTestDir('test.dtd'), context)
1376
1377 parser.resolvers.add(MyResolver())
1378
1379 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1380 tree = parse(StringIO(xml), parser)
1381 root = tree.getroot()
1382 self.assertEqual(
1383 root.attrib, {'default': 'valueA'})
1384 self.assertEqual(
1385 root[0].attrib, {'default': 'valueB'})
1386
1401
1402 parser.resolvers.add(MyResolver())
1403
1404 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1405 tree = parse(StringIO(xml), parser,
1406 base_url=fileUrlInTestDir('__test.xml'))
1407 root = tree.getroot()
1408 self.assertEqual(
1409 root.attrib, {'default': 'valueA'})
1410 self.assertEqual(
1411 root[0].attrib, {'default': 'valueB'})
1412
1414 parse = self.etree.parse
1415 parser = self.etree.XMLParser(attribute_defaults=True)
1416 assertEqual = self.assertEqual
1417 test_url = _str("__nosuch.dtd")
1418
1419 class MyResolver(self.etree.Resolver):
1420 def resolve(self, url, id, context):
1421 assertEqual(url, test_url)
1422 return self.resolve_file(
1423 open(fileInTestDir('test.dtd'), 'rb'), context)
1424
1425 parser.resolvers.add(MyResolver())
1426
1427 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1428 tree = parse(StringIO(xml), parser)
1429 root = tree.getroot()
1430 self.assertEqual(
1431 root.attrib, {'default': 'valueA'})
1432 self.assertEqual(
1433 root[0].attrib, {'default': 'valueB'})
1434
1436 parse = self.etree.parse
1437 parser = self.etree.XMLParser(load_dtd=True)
1438 assertEqual = self.assertEqual
1439 test_url = _str("__nosuch.dtd")
1440
1441 class check(object):
1442 resolved = False
1443
1444 class MyResolver(self.etree.Resolver):
1445 def resolve(self, url, id, context):
1446 assertEqual(url, test_url)
1447 check.resolved = True
1448 return self.resolve_empty(context)
1449
1450 parser.resolvers.add(MyResolver())
1451
1452 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1453 self.assertRaises(etree.XMLSyntaxError, parse, StringIO(xml), parser)
1454 self.assertTrue(check.resolved)
1455
1462
1463 class MyResolver(self.etree.Resolver):
1464 def resolve(self, url, id, context):
1465 raise _LocalException
1466
1467 parser.resolvers.add(MyResolver())
1468
1469 xml = '<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>'
1470 self.assertRaises(_LocalException, parse, BytesIO(xml), parser)
1471
1472 if etree.LIBXML_VERSION > (2,6,20):
1489
1491 xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
1492 <root>
1493 <child1/>
1494 <child2/>
1495 <child3> </child3>
1496 </root>''')
1497
1498 parser = self.etree.XMLParser(resolve_entities=False)
1499 root = etree.fromstring(xml, parser)
1500 self.assertEqual([ el.tag for el in root ],
1501 ['child1', 'child2', 'child3'])
1502
1503 root[0] = root[-1]
1504 self.assertEqual([ el.tag for el in root ],
1505 ['child3', 'child2'])
1506 self.assertEqual(root[0][0].text, ' ')
1507 self.assertEqual(root[0][0].name, 'nbsp')
1508
1524
1531
1533 Entity = self.etree.Entity
1534 self.assertRaises(ValueError, Entity, 'a b c')
1535 self.assertRaises(ValueError, Entity, 'a,b')
1536 self.assertRaises(ValueError, Entity, 'a\0b')
1537 self.assertRaises(ValueError, Entity, '#abc')
1538 self.assertRaises(ValueError, Entity, '#xxyz')
1539
1552
1573
1586
1598
1607
1616
1617
1627
1636
1638 Element = self.etree.Element
1639 SubElement = self.etree.SubElement
1640 root = Element('root')
1641 self.assertRaises(ValueError, root.append, root)
1642 child = SubElement(root, 'child')
1643 self.assertRaises(ValueError, child.append, root)
1644 child2 = SubElement(child, 'child2')
1645 self.assertRaises(ValueError, child2.append, root)
1646 self.assertRaises(ValueError, child2.append, child)
1647 self.assertEqual('child2', root[0][0].tag)
1648
1661
1674
1685
1696
1706
1716
1732
1748
1754
1769
1782
1797
1810
1825
1838
1853
1866
1867
1875
1876
1886
1887
1902
1903
1913
1914
1925
1952
1953
1955 self.assertRaises(TypeError, self.etree.dump, None)
1956
1969
1982
2003
2012
2014 XML = self.etree.XML
2015
2016 root = XML(_bytes('<doc><one/><two>Two</two>Hm<three/></doc>'))
2017 result = []
2018 for el in root.iterchildren(reversed=True):
2019 result.append(el.tag)
2020 self.assertEqual(['three', 'two', 'one'], result)
2021
2030
2039
2048
2050 XML = self.etree.XML
2051
2052 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2053 result = []
2054 for el in root.iterchildren(tag=['two', 'three']):
2055 result.append(el.text)
2056 self.assertEqual(['Two', 'Bla', None], result)
2057
2059 XML = self.etree.XML
2060
2061 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2062 result = []
2063 for el in root.iterchildren('two', 'three'):
2064 result.append(el.text)
2065 self.assertEqual(['Two', 'Bla', None], result)
2066
2068 XML = self.etree.XML
2069
2070 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2071 result = []
2072 for el in root.iterchildren(reversed=True, tag=['two', 'three']):
2073 result.append(el.text)
2074 self.assertEqual([None, 'Bla', 'Two'], result)
2075
2096
2118
2120 Element = self.etree.Element
2121 SubElement = self.etree.SubElement
2122
2123 a = Element('a')
2124 b = SubElement(a, 'b')
2125 c = SubElement(a, 'c')
2126 d = SubElement(b, 'd')
2127 self.assertEqual(
2128 [b, a],
2129 list(d.iterancestors(tag=('a', 'b'))))
2130 self.assertEqual(
2131 [b, a],
2132 list(d.iterancestors('a', 'b')))
2133
2134 self.assertEqual(
2135 [],
2136 list(d.iterancestors(tag=('w', 'x', 'y', 'z'))))
2137 self.assertEqual(
2138 [],
2139 list(d.iterancestors('w', 'x', 'y', 'z')))
2140
2141 self.assertEqual(
2142 [],
2143 list(d.iterancestors(tag=('d', 'x'))))
2144 self.assertEqual(
2145 [],
2146 list(d.iterancestors('d', 'x')))
2147
2148 self.assertEqual(
2149 [b, a],
2150 list(d.iterancestors(tag=('b', '*'))))
2151 self.assertEqual(
2152 [b, a],
2153 list(d.iterancestors('b', '*')))
2154
2155 self.assertEqual(
2156 [b],
2157 list(d.iterancestors(tag=('b', 'c'))))
2158 self.assertEqual(
2159 [b],
2160 list(d.iterancestors('b', 'c')))
2161
2178
2180 Element = self.etree.Element
2181 SubElement = self.etree.SubElement
2182
2183 a = Element('a')
2184 b = SubElement(a, 'b')
2185 c = SubElement(a, 'c')
2186 d = SubElement(b, 'd')
2187 e = SubElement(c, 'e')
2188
2189 self.assertEqual(
2190 [],
2191 list(a.iterdescendants('a')))
2192 self.assertEqual(
2193 [],
2194 list(a.iterdescendants(tag='a')))
2195
2196 a2 = SubElement(e, 'a')
2197 self.assertEqual(
2198 [a2],
2199 list(a.iterdescendants('a')))
2200
2201 self.assertEqual(
2202 [a2],
2203 list(c.iterdescendants('a')))
2204 self.assertEqual(
2205 [a2],
2206 list(c.iterdescendants(tag='a')))
2207
2209 Element = self.etree.Element
2210 SubElement = self.etree.SubElement
2211
2212 a = Element('a')
2213 b = SubElement(a, 'b')
2214 c = SubElement(a, 'c')
2215 d = SubElement(b, 'd')
2216 e = SubElement(c, 'e')
2217
2218 self.assertEqual(
2219 [b, e],
2220 list(a.iterdescendants(tag=('a', 'b', 'e'))))
2221 self.assertEqual(
2222 [b, e],
2223 list(a.iterdescendants('a', 'b', 'e')))
2224
2225 a2 = SubElement(e, 'a')
2226 self.assertEqual(
2227 [b, a2],
2228 list(a.iterdescendants(tag=('a', 'b'))))
2229 self.assertEqual(
2230 [b, a2],
2231 list(a.iterdescendants('a', 'b')))
2232
2233 self.assertEqual(
2234 [],
2235 list(c.iterdescendants(tag=('x', 'y', 'z'))))
2236 self.assertEqual(
2237 [],
2238 list(c.iterdescendants('x', 'y', 'z')))
2239
2240 self.assertEqual(
2241 [b, d, c, e, a2],
2242 list(a.iterdescendants(tag=('x', 'y', 'z', '*'))))
2243 self.assertEqual(
2244 [b, d, c, e, a2],
2245 list(a.iterdescendants('x', 'y', 'z', '*')))
2246
2264
2281
2299
2323
2325 Element = self.etree.Element
2326 SubElement = self.etree.SubElement
2327
2328 a = Element('a')
2329 b = SubElement(a, 'b')
2330 c = SubElement(a, 'c')
2331 d = SubElement(b, 'd')
2332 self.assertEqual(
2333 [],
2334 list(a.itersiblings(tag='XXX')))
2335 self.assertEqual(
2336 [c],
2337 list(b.itersiblings(tag='c')))
2338 self.assertEqual(
2339 [c],
2340 list(b.itersiblings(tag='*')))
2341 self.assertEqual(
2342 [b],
2343 list(c.itersiblings(preceding=True, tag='b')))
2344 self.assertEqual(
2345 [],
2346 list(c.itersiblings(preceding=True, tag='c')))
2347
2349 Element = self.etree.Element
2350 SubElement = self.etree.SubElement
2351
2352 a = Element('a')
2353 b = SubElement(a, 'b')
2354 c = SubElement(a, 'c')
2355 d = SubElement(b, 'd')
2356 e = SubElement(a, 'e')
2357 self.assertEqual(
2358 [],
2359 list(a.itersiblings(tag=('XXX', 'YYY'))))
2360 self.assertEqual(
2361 [c, e],
2362 list(b.itersiblings(tag=('c', 'd', 'e'))))
2363 self.assertEqual(
2364 [b],
2365 list(c.itersiblings(preceding=True, tag=('b', 'b', 'c', 'd'))))
2366 self.assertEqual(
2367 [c, b],
2368 list(e.itersiblings(preceding=True, tag=('c', '*'))))
2369
2371 parseid = self.etree.parseid
2372 XML = self.etree.XML
2373 xml_text = _bytes('''
2374 <!DOCTYPE document [
2375 <!ELEMENT document (h1,p)*>
2376 <!ELEMENT h1 (#PCDATA)>
2377 <!ATTLIST h1 myid ID #REQUIRED>
2378 <!ELEMENT p (#PCDATA)>
2379 <!ATTLIST p someid ID #REQUIRED>
2380 ]>
2381 <document>
2382 <h1 myid="chapter1">...</h1>
2383 <p id="note1" class="note">...</p>
2384 <p>Regular paragraph.</p>
2385 <p xml:id="xmlid">XML:ID paragraph.</p>
2386 <p someid="warn1" class="warning">...</p>
2387 </document>
2388 ''')
2389
2390 tree, dic = parseid(BytesIO(xml_text))
2391 root = tree.getroot()
2392 root2 = XML(xml_text)
2393 self.assertEqual(self._writeElement(root),
2394 self._writeElement(root2))
2395 expected = {
2396 "chapter1" : root[0],
2397 "xmlid" : root[3],
2398 "warn1" : root[4]
2399 }
2400 self.assertTrue("chapter1" in dic)
2401 self.assertTrue("warn1" in dic)
2402 self.assertTrue("xmlid" in dic)
2403 self._checkIDDict(dic, expected)
2404
2406 XMLDTDID = self.etree.XMLDTDID
2407 XML = self.etree.XML
2408 xml_text = _bytes('''
2409 <!DOCTYPE document [
2410 <!ELEMENT document (h1,p)*>
2411 <!ELEMENT h1 (#PCDATA)>
2412 <!ATTLIST h1 myid ID #REQUIRED>
2413 <!ELEMENT p (#PCDATA)>
2414 <!ATTLIST p someid ID #REQUIRED>
2415 ]>
2416 <document>
2417 <h1 myid="chapter1">...</h1>
2418 <p id="note1" class="note">...</p>
2419 <p>Regular paragraph.</p>
2420 <p xml:id="xmlid">XML:ID paragraph.</p>
2421 <p someid="warn1" class="warning">...</p>
2422 </document>
2423 ''')
2424
2425 root, dic = XMLDTDID(xml_text)
2426 root2 = XML(xml_text)
2427 self.assertEqual(self._writeElement(root),
2428 self._writeElement(root2))
2429 expected = {
2430 "chapter1" : root[0],
2431 "xmlid" : root[3],
2432 "warn1" : root[4]
2433 }
2434 self.assertTrue("chapter1" in dic)
2435 self.assertTrue("warn1" in dic)
2436 self.assertTrue("xmlid" in dic)
2437 self._checkIDDict(dic, expected)
2438
2440 XMLDTDID = self.etree.XMLDTDID
2441 XML = self.etree.XML
2442 xml_text = _bytes('''
2443 <document>
2444 <h1 myid="chapter1">...</h1>
2445 <p id="note1" class="note">...</p>
2446 <p>Regular paragraph.</p>
2447 <p someid="warn1" class="warning">...</p>
2448 </document>
2449 ''')
2450
2451 root, dic = XMLDTDID(xml_text)
2452 root2 = XML(xml_text)
2453 self.assertEqual(self._writeElement(root),
2454 self._writeElement(root2))
2455 expected = {}
2456 self._checkIDDict(dic, expected)
2457
2459 XMLDTDID = self.etree.XMLDTDID
2460 XML = self.etree.XML
2461 xml_text = _bytes('''
2462 <!DOCTYPE document [
2463 <!ELEMENT document (h1,p)*>
2464 <!ELEMENT h1 (#PCDATA)>
2465 <!ATTLIST h1 myid ID #REQUIRED>
2466 <!ELEMENT p (#PCDATA)>
2467 <!ATTLIST p someid ID #REQUIRED>
2468 ]>
2469 <document>
2470 <h1 myid="chapter1">...</h1>
2471 <p id="note1" class="note">...</p>
2472 <p>Regular paragraph.</p>
2473 <p xml:id="xmlid">XML:ID paragraph.</p>
2474 <p someid="warn1" class="warning">...</p>
2475 </document>
2476 ''')
2477
2478 parser = etree.XMLParser(collect_ids=False)
2479 root, dic = XMLDTDID(xml_text, parser=parser)
2480 root2 = XML(xml_text)
2481 self.assertEqual(self._writeElement(root),
2482 self._writeElement(root2))
2483 self.assertFalse(dic)
2484 self._checkIDDict(dic, {})
2485
2487 self.assertEqual(len(dic),
2488 len(expected))
2489 self.assertEqual(sorted(dic.items()),
2490 sorted(expected.items()))
2491 if sys.version_info < (3,):
2492 self.assertEqual(sorted(dic.iteritems()),
2493 sorted(expected.iteritems()))
2494 self.assertEqual(sorted(dic.keys()),
2495 sorted(expected.keys()))
2496 if sys.version_info < (3,):
2497 self.assertEqual(sorted(dic.iterkeys()),
2498 sorted(expected.iterkeys()))
2499 if sys.version_info < (3,):
2500 self.assertEqual(sorted(dic.values()),
2501 sorted(expected.values()))
2502 self.assertEqual(sorted(dic.itervalues()),
2503 sorted(expected.itervalues()))
2504
2506 etree = self.etree
2507
2508 r = {'foo': 'http://ns.infrae.com/foo'}
2509 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2510 self.assertEqual(
2511 'foo',
2512 e.prefix)
2513 self.assertEqual(
2514 _bytes('<foo:bar xmlns:foo="http://ns.infrae.com/foo"></foo:bar>'),
2515 self._writeElement(e))
2516
2518 etree = self.etree
2519
2520 r = {None: 'http://ns.infrae.com/foo'}
2521 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2522 self.assertEqual(
2523 None,
2524 e.prefix)
2525 self.assertEqual(
2526 '{http://ns.infrae.com/foo}bar',
2527 e.tag)
2528 self.assertEqual(
2529 _bytes('<bar xmlns="http://ns.infrae.com/foo"></bar>'),
2530 self._writeElement(e))
2531
2533 etree = self.etree
2534
2535 r = {None: 'http://ns.infrae.com/foo', 'p': 'http://test/'}
2536 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2537 self.assertEqual(None, e.prefix)
2538 self.assertEqual('{http://ns.infrae.com/foo}bar', e.tag)
2539 self.assertEqual(
2540 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:p="http://test/"></bar>'),
2541 self._writeElement(e))
2542
2544 etree = self.etree
2545
2546 r = {None: 'http://ns.infrae.com/foo',
2547 'hoi': 'http://ns.infrae.com/hoi'}
2548 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2549 e.set('{http://ns.infrae.com/hoi}test', 'value')
2550 self.assertEqual(
2551 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi" hoi:test="value"></bar>'),
2552 self._writeElement(e))
2553
2555 etree = self.etree
2556
2557 root = etree.Element('{http://test/ns}root',
2558 nsmap={None: 'http://test/ns'})
2559 sub = etree.Element('{http://test/ns}sub',
2560 nsmap={'test': 'http://test/ns'})
2561
2562 sub.attrib['{http://test/ns}attr'] = 'value'
2563 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2564 self.assertEqual(
2565 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2566 etree.tostring(sub))
2567
2568 root.append(sub)
2569 self.assertEqual(
2570 _bytes('<root xmlns="http://test/ns">'
2571 '<sub xmlns:test="http://test/ns" test:attr="value"/>'
2572 '</root>'),
2573 etree.tostring(root))
2574
2576 etree = self.etree
2577
2578 root = etree.Element('root')
2579 sub = etree.Element('{http://test/ns}sub',
2580 nsmap={'test': 'http://test/ns'})
2581
2582 sub.attrib['{http://test/ns}attr'] = 'value'
2583 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2584 self.assertEqual(
2585 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2586 etree.tostring(sub))
2587
2588 root.append(sub)
2589 self.assertEqual(
2590 _bytes('<root>'
2591 '<test:sub xmlns:test="http://test/ns" test:attr="value"/>'
2592 '</root>'),
2593 etree.tostring(root))
2594
2596 etree = self.etree
2597
2598 root = etree.Element('root')
2599 sub = etree.Element('{http://test/ns}sub',
2600 nsmap={None: 'http://test/ns'})
2601
2602 sub.attrib['{http://test/ns}attr'] = 'value'
2603 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2604 self.assertEqual(
2605 _bytes('<sub xmlns="http://test/ns" '
2606 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2607 etree.tostring(sub))
2608
2609 root.append(sub)
2610 self.assertEqual(
2611 _bytes('<root>'
2612 '<sub xmlns="http://test/ns"'
2613 ' xmlns:ns0="http://test/ns" ns0:attr="value"/>'
2614 '</root>'),
2615 etree.tostring(root))
2616
2618 etree = self.etree
2619
2620 root = etree.Element('{http://test/ns}root',
2621 nsmap={'test': 'http://test/ns',
2622 None: 'http://test/ns'})
2623 sub = etree.Element('{http://test/ns}sub',
2624 nsmap={None: 'http://test/ns'})
2625
2626 sub.attrib['{http://test/ns}attr'] = 'value'
2627 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2628 self.assertEqual(
2629 _bytes('<sub xmlns="http://test/ns" '
2630 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2631 etree.tostring(sub))
2632
2633 root.append(sub)
2634 self.assertEqual(
2635 _bytes('<test:root xmlns:test="http://test/ns" xmlns="http://test/ns">'
2636 '<test:sub test:attr="value"/>'
2637 '</test:root>'),
2638 etree.tostring(root))
2639
2641 etree = self.etree
2642 r = {None: 'http://ns.infrae.com/foo',
2643 'hoi': 'http://ns.infrae.com/hoi'}
2644 e = etree.Element('{http://ns.infrae.com/foo}z', nsmap=r)
2645 tree = etree.ElementTree(element=e)
2646 etree.SubElement(e, '{http://ns.infrae.com/hoi}x')
2647 self.assertEqual(
2648 _bytes('<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>'),
2649 self._writeElement(e))
2650
2652 etree = self.etree
2653
2654 r = {None: 'http://ns.infrae.com/foo'}
2655 e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2656 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2657
2658 e1.append(e2)
2659
2660 self.assertEqual(
2661 None,
2662 e1.prefix)
2663 self.assertEqual(
2664 None,
2665 e1[0].prefix)
2666 self.assertEqual(
2667 '{http://ns.infrae.com/foo}bar',
2668 e1.tag)
2669 self.assertEqual(
2670 '{http://ns.infrae.com/foo}bar',
2671 e1[0].tag)
2672
2674 etree = self.etree
2675
2676 r = {None: 'http://ns.infrae.com/BAR'}
2677 e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
2678 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2679
2680 e1.append(e2)
2681
2682 self.assertEqual(
2683 None,
2684 e1.prefix)
2685 self.assertNotEqual(
2686 None,
2687 e2.prefix)
2688 self.assertEqual(
2689 '{http://ns.infrae.com/BAR}bar',
2690 e1.tag)
2691 self.assertEqual(
2692 '{http://ns.infrae.com/foo}bar',
2693 e2.tag)
2694
2696 ns_href = "http://a.b.c"
2697 one = self.etree.fromstring(
2698 _bytes('<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href))
2699 baz = one[0][0]
2700
2701 two = self.etree.fromstring(
2702 _bytes('<root xmlns:ns="%s"/>' % ns_href))
2703 two.append(baz)
2704 del one
2705
2706 self.assertEqual('{%s}baz' % ns_href, baz.tag)
2707 self.assertEqual(
2708 _bytes('<root xmlns:ns="%s"><ns:baz/></root>' % ns_href),
2709 self.etree.tostring(two))
2710
2724
2741
2752
2754 xml = ('<root>' +
2755 ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2756 '<n64:x/>' + '</a>'*100 + '</root>').encode('utf8')
2757 root = self.etree.fromstring(xml)
2758 self.assertEqual(xml, self.etree.tostring(root))
2759 self.etree.cleanup_namespaces(root)
2760 self.assertEqual(
2761 b'<root>' + b'<a>'*64 + b'<a xmlns:n64="NS64">' + b'<a>'*35 +
2762 b'<n64:x/>' + b'</a>'*100 + b'</root>',
2763 self.etree.tostring(root))
2764
2766 xml = ('<root>' +
2767 ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2768 '<n64:x xmlns:a="A" a:attr="X"/>' +
2769 '</a>'*100 +
2770 '</root>').encode('utf8')
2771 root = self.etree.fromstring(xml)
2772 self.assertEqual(xml, self.etree.tostring(root))
2773 self.etree.cleanup_namespaces(root, top_nsmap={'n64': 'NS64'})
2774 self.assertEqual(
2775 b'<root xmlns:n64="NS64">' + b'<a>'*100 +
2776 b'<n64:x xmlns:a="A" a:attr="X"/>' + b'</a>'*100 + b'</root>',
2777 self.etree.tostring(root))
2778
2780 xml = ('<root xmlns:n64="NS64" xmlns:foo="FOO" xmlns:unused1="UNUSED" xmlns:no="NO">'
2781 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2782 '<foo>foo:bar</foo>'
2783 '</root>').encode('utf8')
2784 root = self.etree.fromstring(xml)
2785 self.assertEqual(xml, self.etree.tostring(root))
2786 self.etree.cleanup_namespaces(root, keep_ns_prefixes=['foo'])
2787 self.assertEqual(
2788 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2789 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2790 b'<foo>foo:bar</foo>'
2791 b'</root>',
2792 self.etree.tostring(root))
2793
2795 xml = ('<root xmlns:n64="NS64" xmlns:unused1="UNUSED" xmlns:no="NO">'
2796 '<sub xmlns:foo="FOO">'
2797 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2798 '<foo>foo:bar</foo>'
2799 '</sub>'
2800 '</root>').encode('utf8')
2801 root = self.etree.fromstring(xml)
2802 self.assertEqual(xml, self.etree.tostring(root))
2803 self.etree.cleanup_namespaces(
2804 root,
2805 top_nsmap={'foo': 'FOO', 'unused1': 'UNUSED'},
2806 keep_ns_prefixes=['foo'])
2807 self.assertEqual(
2808 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2809 b'<sub>'
2810 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2811 b'<foo>foo:bar</foo>'
2812 b'</sub>'
2813 b'</root>',
2814 self.etree.tostring(root))
2815
2817 etree = self.etree
2818
2819 r = {None: 'http://ns.infrae.com/foo',
2820 'hoi': 'http://ns.infrae.com/hoi'}
2821 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2822 self.assertEqual(
2823 r,
2824 e.nsmap)
2825
2827 etree = self.etree
2828
2829 re = {None: 'http://ns.infrae.com/foo',
2830 'hoi': 'http://ns.infrae.com/hoi'}
2831 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=re)
2832
2833 rs = {None: 'http://ns.infrae.com/honk',
2834 'top': 'http://ns.infrae.com/top'}
2835 s = etree.SubElement(e, '{http://ns.infrae.com/honk}bar', nsmap=rs)
2836
2837 r = re.copy()
2838 r.update(rs)
2839 self.assertEqual(re, e.nsmap)
2840 self.assertEqual(r, s.nsmap)
2841
2843 etree = self.etree
2844 el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
2845 self.assertEqual({'hha': None}, el.nsmap)
2846
2848 Element = self.etree.Element
2849 SubElement = self.etree.SubElement
2850
2851 a = Element('a')
2852 b = SubElement(a, 'b')
2853 c = SubElement(a, 'c')
2854 d = SubElement(b, 'd')
2855 e = SubElement(c, 'e')
2856 f = SubElement(c, 'f')
2857
2858 self.assertEqual(
2859 [a, b],
2860 list(a.getiterator('a', 'b')))
2861 self.assertEqual(
2862 [],
2863 list(a.getiterator('x', 'y')))
2864 self.assertEqual(
2865 [a, f],
2866 list(a.getiterator('f', 'a')))
2867 self.assertEqual(
2868 [c, e, f],
2869 list(c.getiterator('c', '*', 'a')))
2870 self.assertEqual(
2871 [],
2872 list(a.getiterator( (), () )))
2873
2875 Element = self.etree.Element
2876 SubElement = self.etree.SubElement
2877
2878 a = Element('a')
2879 b = SubElement(a, 'b')
2880 c = SubElement(a, 'c')
2881 d = SubElement(b, 'd')
2882 e = SubElement(c, 'e')
2883 f = SubElement(c, 'f')
2884
2885 self.assertEqual(
2886 [a, b],
2887 list(a.getiterator( ('a', 'b') )))
2888 self.assertEqual(
2889 [],
2890 list(a.getiterator( ('x', 'y') )))
2891 self.assertEqual(
2892 [a, f],
2893 list(a.getiterator( ('f', 'a') )))
2894 self.assertEqual(
2895 [c, e, f],
2896 list(c.getiterator( ('c', '*', 'a') )))
2897 self.assertEqual(
2898 [],
2899 list(a.getiterator( () )))
2900
2902 Element = self.etree.Element
2903 SubElement = self.etree.SubElement
2904
2905 a = Element('{a}a')
2906 b = SubElement(a, '{a}b')
2907 c = SubElement(a, '{a}c')
2908 d = SubElement(b, '{b}d')
2909 e = SubElement(c, '{a}e')
2910 f = SubElement(c, '{b}f')
2911 g = SubElement(c, 'g')
2912
2913 self.assertEqual(
2914 [a],
2915 list(a.getiterator('{a}a')))
2916 self.assertEqual(
2917 [],
2918 list(a.getiterator('{b}a')))
2919 self.assertEqual(
2920 [],
2921 list(a.getiterator('a')))
2922 self.assertEqual(
2923 [a,b,d,c,e,f,g],
2924 list(a.getiterator('*')))
2925 self.assertEqual(
2926 [f],
2927 list(c.getiterator('{b}*')))
2928 self.assertEqual(
2929 [d, f],
2930 list(a.getiterator('{b}*')))
2931 self.assertEqual(
2932 [g],
2933 list(a.getiterator('g')))
2934 self.assertEqual(
2935 [g],
2936 list(a.getiterator('{}g')))
2937 self.assertEqual(
2938 [g],
2939 list(a.getiterator('{}*')))
2940
2942 Element = self.etree.Element
2943 SubElement = self.etree.SubElement
2944
2945 a = Element('{a}a')
2946 b = SubElement(a, '{nsA}b')
2947 c = SubElement(b, '{nsB}b')
2948 d = SubElement(a, 'b')
2949 e = SubElement(a, '{nsA}e')
2950 f = SubElement(e, '{nsB}e')
2951 g = SubElement(e, 'e')
2952
2953 self.assertEqual(
2954 [b, c, d],
2955 list(a.getiterator('{*}b')))
2956 self.assertEqual(
2957 [e, f, g],
2958 list(a.getiterator('{*}e')))
2959 self.assertEqual(
2960 [a, b, c, d, e, f, g],
2961 list(a.getiterator('{*}*')))
2962
2987
3003
3020
3022 a = etree.Element("a")
3023 b = etree.SubElement(a, "b")
3024 c = etree.SubElement(a, "c")
3025 d1 = etree.SubElement(c, "d")
3026 d2 = etree.SubElement(c, "d")
3027 c.text = d1.text = 'TEXT'
3028
3029 tree = etree.ElementTree(a)
3030 self.assertEqual('.', tree.getelementpath(a))
3031 self.assertEqual('c/d[1]', tree.getelementpath(d1))
3032 self.assertEqual('c/d[2]', tree.getelementpath(d2))
3033
3034 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3035 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3036
3037 tree = etree.ElementTree(c)
3038 self.assertEqual('.', tree.getelementpath(c))
3039 self.assertEqual('d[2]', tree.getelementpath(d2))
3040 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3041
3042 tree = etree.ElementTree(b)
3043 self.assertEqual('.', tree.getelementpath(b))
3044 self.assertRaises(ValueError, tree.getelementpath, a)
3045 self.assertRaises(ValueError, tree.getelementpath, c)
3046 self.assertRaises(ValueError, tree.getelementpath, d2)
3047
3049 a = etree.Element("{http://ns1/}a")
3050 b = etree.SubElement(a, "{http://ns1/}b")
3051 c = etree.SubElement(a, "{http://ns1/}c")
3052 d1 = etree.SubElement(c, "{http://ns1/}d")
3053 d2 = etree.SubElement(c, "{http://ns2/}d")
3054 d3 = etree.SubElement(c, "{http://ns1/}d")
3055
3056 tree = etree.ElementTree(a)
3057 self.assertEqual('.', tree.getelementpath(a))
3058 self.assertEqual('{http://ns1/}c/{http://ns1/}d[1]',
3059 tree.getelementpath(d1))
3060 self.assertEqual('{http://ns1/}c/{http://ns2/}d',
3061 tree.getelementpath(d2))
3062 self.assertEqual('{http://ns1/}c/{http://ns1/}d[2]',
3063 tree.getelementpath(d3))
3064
3065 self.assertEqual(a, tree.find(tree.getelementpath(a)))
3066 self.assertEqual(b, tree.find(tree.getelementpath(b)))
3067 self.assertEqual(c, tree.find(tree.getelementpath(c)))
3068 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3069 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3070 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
3071
3072 tree = etree.ElementTree(c)
3073 self.assertEqual('{http://ns1/}d[1]', tree.getelementpath(d1))
3074 self.assertEqual('{http://ns2/}d', tree.getelementpath(d2))
3075 self.assertEqual('{http://ns1/}d[2]', tree.getelementpath(d3))
3076 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3077 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3078 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
3079
3080 tree = etree.ElementTree(b)
3081 self.assertRaises(ValueError, tree.getelementpath, d1)
3082 self.assertRaises(ValueError, tree.getelementpath, d2)
3083
3090
3097
3106
3108 XML = self.etree.XML
3109 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>'))
3110 self.assertEqual(len(root.findall(".//{X}b")), 2)
3111 self.assertEqual(len(root.findall(".//{X}*")), 2)
3112 self.assertEqual(len(root.findall(".//b")), 3)
3113
3115 XML = self.etree.XML
3116 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3117 nsmap = {'xx': 'X'}
3118 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3119 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 2)
3120 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3121 nsmap = {'xx': 'Y'}
3122 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
3123 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 1)
3124 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3125
3127 XML = self.etree.XML
3128 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3129 nsmap = {'xx': 'X'}
3130 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3131 nsmap = {'xx': 'X', None: 'Y'}
3132 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
3133 nsmap = {'xx': 'X', '': 'Y'}
3134 self.assertRaises(ValueError, root.findall, ".//xx:b", namespaces=nsmap)
3135
3142
3144 etree = self.etree
3145 e = etree.Element('foo')
3146 for i in range(10):
3147 etree.SubElement(e, 'a%s' % i)
3148 for i in range(10):
3149 self.assertEqual(
3150 i,
3151 e.index(e[i]))
3152 self.assertEqual(
3153 3, e.index(e[3], 3))
3154 self.assertRaises(
3155 ValueError, e.index, e[3], 4)
3156 self.assertRaises(
3157 ValueError, e.index, e[3], 0, 2)
3158 self.assertRaises(
3159 ValueError, e.index, e[8], 0, -3)
3160 self.assertRaises(
3161 ValueError, e.index, e[8], -5, -3)
3162 self.assertEqual(
3163 8, e.index(e[8], 0, -1))
3164 self.assertEqual(
3165 8, e.index(e[8], -12, -1))
3166 self.assertEqual(
3167 0, e.index(e[0], -12, -1))
3168
3170 etree = self.etree
3171 e = etree.Element('foo')
3172 for i in range(10):
3173 el = etree.SubElement(e, 'a%s' % i)
3174 el.text = "text%d" % i
3175 el.tail = "tail%d" % i
3176
3177 child0 = e[0]
3178 child1 = e[1]
3179 child2 = e[2]
3180
3181 e.replace(e[0], e[1])
3182 self.assertEqual(
3183 9, len(e))
3184 self.assertEqual(
3185 child1, e[0])
3186 self.assertEqual(
3187 child1.text, "text1")
3188 self.assertEqual(
3189 child1.tail, "tail1")
3190 self.assertEqual(
3191 child0.tail, "tail0")
3192 self.assertEqual(
3193 child2, e[1])
3194
3195 e.replace(e[-1], e[0])
3196 self.assertEqual(
3197 child1, e[-1])
3198 self.assertEqual(
3199 child1.text, "text1")
3200 self.assertEqual(
3201 child1.tail, "tail1")
3202 self.assertEqual(
3203 child2, e[0])
3204
3206 etree = self.etree
3207 e = etree.Element('foo')
3208 for i in range(10):
3209 etree.SubElement(e, 'a%s' % i)
3210
3211 new_element = etree.Element("test")
3212 new_element.text = "TESTTEXT"
3213 new_element.tail = "TESTTAIL"
3214 child1 = e[1]
3215 e.replace(e[0], new_element)
3216 self.assertEqual(
3217 new_element, e[0])
3218 self.assertEqual(
3219 "TESTTEXT",
3220 e[0].text)
3221 self.assertEqual(
3222 "TESTTAIL",
3223 e[0].tail)
3224 self.assertEqual(
3225 child1, e[1])
3226
3242
3260
3278
3296
3298 Element = self.etree.Element
3299 SubElement = self.etree.SubElement
3300 try:
3301 slice
3302 except NameError:
3303 print("slice() not found")
3304 return
3305
3306 a = Element('a')
3307 b = SubElement(a, 'b')
3308 c = SubElement(a, 'c')
3309 d = SubElement(a, 'd')
3310 e = SubElement(a, 'e')
3311
3312 x = Element('x')
3313 y = Element('y')
3314 z = Element('z')
3315
3316 self.assertRaises(
3317 ValueError,
3318 operator.setitem, a, slice(1,None,2), [x, y, z])
3319
3320 self.assertEqual(
3321 [b, c, d, e],
3322 list(a))
3323
3336
3338 XML = self.etree.XML
3339 root = XML(_bytes(
3340 '<?xml version="1.0"?>\n'
3341 '<root>' + '\n' * 65536 +
3342 '<p>' + '\n' * 65536 + '</p>\n' +
3343 '<br/>\n'
3344 '</root>'))
3345
3346 if self.etree.LIBXML_VERSION >= (2, 9):
3347 expected = [2, 131074, 131076]
3348 else:
3349 expected = [2, 65535, 65535]
3350
3351 self.assertEqual(expected, [el.sourceline for el in root.iter()])
3352
3360
3369
3379
3389
3395
3403
3409
3416
3422
3424 etree = self.etree
3425 xml_header = '<?xml version="1.0" encoding="ascii"?>'
3426 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3427 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3428 doctype_string = '<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id)
3429
3430 xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
3431
3432 tree = etree.parse(BytesIO(xml))
3433 docinfo = tree.docinfo
3434 self.assertEqual(docinfo.encoding, "ascii")
3435 self.assertEqual(docinfo.xml_version, "1.0")
3436 self.assertEqual(docinfo.public_id, pub_id)
3437 self.assertEqual(docinfo.system_url, sys_id)
3438 self.assertEqual(docinfo.root_name, 'html')
3439 self.assertEqual(docinfo.doctype, doctype_string)
3440
3456
3468
3480
3486
3488 etree = self.etree
3489 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3490 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3491 doctype_string = _bytes('<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id))
3492
3493 xml = _bytes('<!DOCTYPE root>\n<root/>')
3494 tree = etree.parse(BytesIO(xml))
3495 self.assertEqual(xml.replace(_bytes('<!DOCTYPE root>'), doctype_string),
3496 etree.tostring(tree, doctype=doctype_string))
3497
3499 etree = self.etree
3500 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3501 self.assertEqual(root.base, "http://no/such/url")
3502 self.assertEqual(
3503 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3504 root.base = "https://secret/url"
3505 self.assertEqual(root.base, "https://secret/url")
3506 self.assertEqual(
3507 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3508 "https://secret/url")
3509
3511 etree = self.etree
3512 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3513 self.assertEqual(root.base, "http://no/such/url")
3514 self.assertEqual(
3515 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3516 root.set('{http://www.w3.org/XML/1998/namespace}base',
3517 "https://secret/url")
3518 self.assertEqual(root.base, "https://secret/url")
3519 self.assertEqual(
3520 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3521 "https://secret/url")
3522
3528
3533
3540
3554
3556 Element = self.etree.Element
3557
3558 a = Element('a')
3559 self.assertRaises(ValueError, setattr, a, "text", 'ha\0ho')
3560 self.assertRaises(ValueError, setattr, a, "tail", 'ha\0ho')
3561
3562 self.assertRaises(ValueError, Element, 'ha\0ho')
3563
3565 Element = self.etree.Element
3566
3567 a = Element('a')
3568 self.assertRaises(ValueError, setattr, a, "text",
3569 _str('ha\0ho'))
3570 self.assertRaises(ValueError, setattr, a, "tail",
3571 _str('ha\0ho'))
3572
3573 self.assertRaises(ValueError, Element,
3574 _str('ha\0ho'))
3575
3577 Element = self.etree.Element
3578
3579 a = Element('a')
3580 self.assertRaises(ValueError, setattr, a, "text", 'ha\x07ho')
3581 self.assertRaises(ValueError, setattr, a, "text", 'ha\x02ho')
3582
3583 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x07ho')
3584 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x02ho')
3585
3586 self.assertRaises(ValueError, Element, 'ha\x07ho')
3587 self.assertRaises(ValueError, Element, 'ha\x02ho')
3588
3590 Element = self.etree.Element
3591
3592 a = Element('a')
3593 self.assertRaises(ValueError, setattr, a, "text",
3594 _str('ha\x07ho'))
3595 self.assertRaises(ValueError, setattr, a, "text",
3596 _str('ha\x02ho'))
3597
3598 self.assertRaises(ValueError, setattr, a, "tail",
3599 _str('ha\x07ho'))
3600 self.assertRaises(ValueError, setattr, a, "tail",
3601 _str('ha\x02ho'))
3602
3603 self.assertRaises(ValueError, Element,
3604 _str('ha\x07ho'))
3605 self.assertRaises(ValueError, Element,
3606 _str('ha\x02ho'))
3607
3609 Element = self.etree.Element
3610
3611 a = Element('a')
3612 self.assertRaises(ValueError, setattr, a, "text",
3613 _str('ha\u1234\x07ho'))
3614 self.assertRaises(ValueError, setattr, a, "text",
3615 _str('ha\u1234\x02ho'))
3616
3617 self.assertRaises(ValueError, setattr, a, "tail",
3618 _str('ha\u1234\x07ho'))
3619 self.assertRaises(ValueError, setattr, a, "tail",
3620 _str('ha\u1234\x02ho'))
3621
3622 self.assertRaises(ValueError, Element,
3623 _str('ha\u1234\x07ho'))
3624 self.assertRaises(ValueError, Element,
3625 _str('ha\u1234\x02ho'))
3626
3640
3645
3663
3683
3685 tostring = self.etree.tostring
3686 html = self.etree.fromstring(
3687 '<html><body>'
3688 '<div><p>Some text<i>\r\n</i></p></div>\r\n'
3689 '</body></html>',
3690 parser=self.etree.HTMLParser())
3691 self.assertEqual(html.tag, 'html')
3692 div = html.find('.//div')
3693 self.assertEqual(div.tail, '\r\n')
3694 result = tostring(div, method='html')
3695 self.assertEqual(
3696 result,
3697 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
3698 result = tostring(div, method='html', with_tail=True)
3699 self.assertEqual(
3700 result,
3701 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
3702 result = tostring(div, method='html', with_tail=False)
3703 self.assertEqual(
3704 result,
3705 _bytes("<div><p>Some text<i>\r\n</i></p></div>"))
3706
3728
3730 tostring = self.etree.tostring
3731 XML = self.etree.XML
3732 ElementTree = self.etree.ElementTree
3733
3734 root = XML(_bytes("<root/>"))
3735
3736 tree = ElementTree(root)
3737 self.assertEqual(None, tree.docinfo.standalone)
3738
3739 result = tostring(root, xml_declaration=True, encoding="ASCII")
3740 self.assertEqual(result, _bytes(
3741 "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
3742
3743 result = tostring(root, xml_declaration=True, encoding="ASCII",
3744 standalone=True)
3745 self.assertEqual(result, _bytes(
3746 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
3747
3748 tree = ElementTree(XML(result))
3749 self.assertEqual(True, tree.docinfo.standalone)
3750
3751 result = tostring(root, xml_declaration=True, encoding="ASCII",
3752 standalone=False)
3753 self.assertEqual(result, _bytes(
3754 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"))
3755
3756 tree = ElementTree(XML(result))
3757 self.assertEqual(False, tree.docinfo.standalone)
3758
3778
3780 tostring = self.etree.tostring
3781 Element = self.etree.Element
3782 SubElement = self.etree.SubElement
3783
3784 a = Element('a')
3785 a.text = "A"
3786 a.tail = "tail"
3787 b = SubElement(a, 'b')
3788 b.text = "B"
3789 b.tail = _str("Søk på nettet")
3790 c = SubElement(a, 'c')
3791 c.text = "C"
3792
3793 result = tostring(a, method="text", encoding="UTF-16")
3794
3795 self.assertEqual(_str('ABSøk på nettetCtail').encode("UTF-16"),
3796 result)
3797
3799 tostring = self.etree.tostring
3800 Element = self.etree.Element
3801 SubElement = self.etree.SubElement
3802
3803 a = Element('a')
3804 a.text = _str('Søk på nettetA')
3805 a.tail = "tail"
3806 b = SubElement(a, 'b')
3807 b.text = "B"
3808 b.tail = _str('Søk på nettetB')
3809 c = SubElement(a, 'c')
3810 c.text = "C"
3811
3812 self.assertRaises(UnicodeEncodeError,
3813 tostring, a, method="text")
3814
3815 self.assertEqual(
3816 _str('Søk på nettetABSøk på nettetBCtail').encode('utf-8'),
3817 tostring(a, encoding="UTF-8", method="text"))
3818
3831
3847
3851
3866
3884
3897
3899 tostring = self.etree.tostring
3900 Element = self.etree.Element
3901 SubElement = self.etree.SubElement
3902
3903 a = Element('a')
3904 b = SubElement(a, 'b')
3905 c = SubElement(a, 'c')
3906 d = SubElement(c, 'd')
3907 self.assertTrue(isinstance(tostring(b, encoding=_unicode), _unicode))
3908 self.assertTrue(isinstance(tostring(c, encoding=_unicode), _unicode))
3909 self.assertEqual(_bytes('<b></b>'),
3910 canonicalize(tostring(b, encoding=_unicode)))
3911 self.assertEqual(_bytes('<c><d></d></c>'),
3912 canonicalize(tostring(c, encoding=_unicode)))
3913
3918
3933
3935 tostring = self.etree.tostring
3936 Element = self.etree.Element
3937 SubElement = self.etree.SubElement
3938
3939 a = Element('a')
3940 b = SubElement(a, 'b')
3941 c = SubElement(a, 'c')
3942
3943 result = tostring(a, encoding=_unicode)
3944 self.assertEqual(result, "<a><b/><c/></a>")
3945
3946 result = tostring(a, encoding=_unicode, pretty_print=False)
3947 self.assertEqual(result, "<a><b/><c/></a>")
3948
3949 result = tostring(a, encoding=_unicode, pretty_print=True)
3950 self.assertEqual(result, "<a>\n <b/>\n <c/>\n</a>\n")
3951
3963
3965 class SubEl(etree.ElementBase):
3966 pass
3967
3968 el1 = SubEl()
3969 el2 = SubEl()
3970 self.assertEqual('SubEl', el1.tag)
3971 self.assertEqual('SubEl', el2.tag)
3972 el1.other = el2
3973 el2.other = el1
3974
3975 del el1, el2
3976 gc.collect()
3977
3978
3992
3994 root = etree.Element('parent')
3995 c1 = etree.SubElement(root, 'child1')
3996 c2 = etree.SubElement(root, 'child2')
3997
3998 root.remove(c1)
3999 root.remove(c2)
4000 c1.addnext(c2)
4001 c1.tail = 'abc'
4002 c2.tail = 'xyz'
4003 del c1
4004
4005 c2.getprevious()
4006
4007 self.assertEqual('child1', c2.getprevious().tag)
4008 self.assertEqual('abc', c2.getprevious().tail)
4009
4010
4011
4012 - def _writeElement(self, element, encoding='us-ascii', compression=0):
4023
4068
4069 res_instance = res()
4070 parser = etree.XMLParser(load_dtd = True)
4071 parser.resolvers.add(res_instance)
4072
4073 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
4074 parser = parser)
4075
4076 self.include(tree)
4077
4078 called = list(res_instance.called.items())
4079 called.sort()
4080 self.assertEqual(
4081 [("dtd", True), ("include", True), ("input", True)],
4082 called)
4083
4085 data = textwrap.dedent('''
4086 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
4087 <foo/>
4088 <xi:include href="./test.xml" />
4089 </doc>
4090 ''')
4091
4092 class Resolver(etree.Resolver):
4093 called = {}
4094
4095 def resolve(self, url, id, context):
4096 if url.endswith("test_xinclude.xml"):
4097 assert not self.called.get("input")
4098 self.called["input"] = True
4099 return None
4100 elif url.endswith('/test5.xml'):
4101 assert not self.called.get("DONE")
4102 self.called["DONE"] = True
4103 return self.resolve_string('<DONE/>', context)
4104 else:
4105 _, filename = url.rsplit('/', 1)
4106 assert not self.called.get(filename)
4107 self.called[filename] = True
4108 next_data = data.replace(
4109 'test.xml', 'test%d.xml' % len(self.called))
4110 return self.resolve_string(next_data, context)
4111
4112 res_instance = Resolver()
4113 parser = etree.XMLParser(load_dtd=True)
4114 parser.resolvers.add(res_instance)
4115
4116 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
4117 parser=parser)
4118
4119 self.include(tree)
4120
4121 called = list(res_instance.called.items())
4122 called.sort()
4123 self.assertEqual(
4124 [("DONE", True), ("input", True), ("test.xml", True),
4125 ("test2.xml", True), ("test3.xml", True), ("test4.xml", True)],
4126 called)
4127
4132
4138
4142 tree = self.parse(_bytes('<a><b/></a>'))
4143 f = BytesIO()
4144 tree.write_c14n(f)
4145 s = f.getvalue()
4146 self.assertEqual(_bytes('<a><b></b></a>'),
4147 s)
4148
4150 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4151 f = BytesIO()
4152 tree.write_c14n(f, compression=9)
4153 with closing(gzip.GzipFile(fileobj=BytesIO(f.getvalue()))) as gzfile:
4154 s = gzfile.read()
4155 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
4156 s)
4157
4165
4174
4192
4204
4216
4218 tree = self.parse(_bytes(
4219 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4220 f = BytesIO()
4221 tree.write_c14n(f)
4222 s = f.getvalue()
4223 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4224 s)
4225 f = BytesIO()
4226 tree.write_c14n(f, exclusive=False)
4227 s = f.getvalue()
4228 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4229 s)
4230 f = BytesIO()
4231 tree.write_c14n(f, exclusive=True)
4232 s = f.getvalue()
4233 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4234 s)
4235
4236 f = BytesIO()
4237 tree.write_c14n(f, exclusive=True, inclusive_ns_prefixes=['z'])
4238 s = f.getvalue()
4239 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:z="http://cde"><z:b></z:b></a>'),
4240 s)
4241
4243 tree = self.parse(_bytes(
4244 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4245 s = etree.tostring(tree, method='c14n')
4246 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4247 s)
4248 s = etree.tostring(tree, method='c14n', exclusive=False)
4249 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4250 s)
4251 s = etree.tostring(tree, method='c14n', exclusive=True)
4252 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4253 s)
4254
4255 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
4256 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd"><z:b xmlns:z="http://cde"></z:b></a>'),
4257 s)
4258
4260 tree = self.parse(_bytes(
4261 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4262 s = etree.tostring(tree.getroot(), method='c14n')
4263 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4264 s)
4265 s = etree.tostring(tree.getroot(), method='c14n', exclusive=False)
4266 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4267 s)
4268 s = etree.tostring(tree.getroot(), method='c14n', exclusive=True)
4269 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4270 s)
4271
4272 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=False)
4273 self.assertEqual(_bytes('<z:b xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
4274 s)
4275 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True)
4276 self.assertEqual(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
4277 s)
4278
4279 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
4280 self.assertEqual(_bytes('<z:b xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
4281 s)
4282
4284 """ Regression test to fix memory allocation issues (use 3+ inclusive NS spaces)"""
4285 tree = self.parse(_bytes(
4286 '<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4287
4288 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['x', 'y', 'z'])
4289 self.assertEqual(_bytes('<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4290 s)
4291
4295 tree = self.parse(_bytes('<a><b/></a>'))
4296 f = BytesIO()
4297 tree.write(f)
4298 s = f.getvalue()
4299 self.assertEqual(_bytes('<a><b/></a>'),
4300 s)
4301
4303 tree = self.parse(_bytes('<a><b/></a>'))
4304 f = BytesIO()
4305 tree.write(f, doctype='HUHU')
4306 s = f.getvalue()
4307 self.assertEqual(_bytes('HUHU\n<a><b/></a>'),
4308 s)
4309
4311 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4312 f = BytesIO()
4313 tree.write(f, compression=9)
4314 with closing(gzip.GzipFile(fileobj=BytesIO(f.getvalue()))) as gzfile:
4315 s = gzfile.read()
4316 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4317 s)
4318
4320 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4321 f = BytesIO()
4322 tree.write(f, compression=9, doctype='<!DOCTYPE a>')
4323 with closing(gzip.GzipFile(fileobj=BytesIO(f.getvalue()))) as gzfile:
4324 s = gzfile.read()
4325 self.assertEqual(_bytes('<!DOCTYPE a>\n<a>'+'<b/>'*200+'</a>'),
4326 s)
4327
4329 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4330 f = BytesIO()
4331 tree.write(f, compression=0)
4332 s0 = f.getvalue()
4333
4334 f = BytesIO()
4335 tree.write(f)
4336 self.assertEqual(f.getvalue(), s0)
4337
4338 f = BytesIO()
4339 tree.write(f, compression=1)
4340 s = f.getvalue()
4341 self.assertTrue(len(s) <= len(s0))
4342 with closing(gzip.GzipFile(fileobj=BytesIO(s))) as gzfile:
4343 s1 = gzfile.read()
4344
4345 f = BytesIO()
4346 tree.write(f, compression=9)
4347 s = f.getvalue()
4348 self.assertTrue(len(s) <= len(s0))
4349 with closing(gzip.GzipFile(fileobj=BytesIO(s))) as gzfile:
4350 s9 = gzfile.read()
4351
4352 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4353 s0)
4354 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4355 s1)
4356 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4357 s9)
4358
4366
4375
4383
4392
4395 etree = etree
4396
4418
4420 """This can't really be tested as long as there isn't a way to
4421 reset the logging setup ...
4422 """
4423 parse = self.etree.parse
4424
4425 messages = []
4426 class Logger(self.etree.PyErrorLog):
4427 def log(self, entry, message, *args):
4428 messages.append(message)
4429
4430 self.etree.use_global_python_log(Logger())
4431 f = BytesIO('<a><b></c></b></a>')
4432 try:
4433 parse(f)
4434 except SyntaxError:
4435 pass
4436 f.close()
4437
4438 self.assertTrue([ message for message in messages
4439 if 'mismatch' in message ])
4440 self.assertTrue([ message for message in messages
4441 if ':PARSER:' in message])
4442 self.assertTrue([ message for message in messages
4443 if ':ERR_TAG_NAME_MISMATCH:' in message ])
4444 self.assertTrue([ message for message in messages
4445 if ':1:15:' in message ])
4446
4449 etree = etree
4450
4454
4456 class Target(object):
4457 def start(self, tag, attrib):
4458 return 'start(%s)' % tag
4459 def end(self, tag):
4460 return 'end(%s)' % tag
4461 def close(self):
4462 return 'close()'
4463
4464 parser = self.etree.XMLPullParser(target=Target())
4465 events = parser.read_events()
4466
4467 parser.feed('<root><element>')
4468 self.assertFalse(list(events))
4469 self.assertFalse(list(events))
4470 parser.feed('</element><child>')
4471 self.assertEqual([('end', 'end(element)')], list(events))
4472 parser.feed('</child>')
4473 self.assertEqual([('end', 'end(child)')], list(events))
4474 parser.feed('</root>')
4475 self.assertEqual([('end', 'end(root)')], list(events))
4476 self.assertFalse(list(events))
4477 self.assertEqual('close()', parser.close())
4478
4480 class Target(object):
4481 def start(self, tag, attrib):
4482 return 'start(%s)' % tag
4483 def end(self, tag):
4484 return 'end(%s)' % tag
4485 def close(self):
4486 return 'close()'
4487
4488 parser = self.etree.XMLPullParser(
4489 ['start', 'end'], target=Target())
4490 events = parser.read_events()
4491
4492 parser.feed('<root><element>')
4493 self.assertEqual(
4494 [('start', 'start(root)'), ('start', 'start(element)')],
4495 list(events))
4496 self.assertFalse(list(events))
4497 parser.feed('</element><child>')
4498 self.assertEqual(
4499 [('end', 'end(element)'), ('start', 'start(child)')],
4500 list(events))
4501 parser.feed('</child>')
4502 self.assertEqual(
4503 [('end', 'end(child)')],
4504 list(events))
4505 parser.feed('</root>')
4506 self.assertEqual(
4507 [('end', 'end(root)')],
4508 list(events))
4509 self.assertFalse(list(events))
4510 self.assertEqual('close()', parser.close())
4511
4513 parser = self.etree.XMLPullParser(
4514 ['start', 'end'], target=etree.TreeBuilder())
4515 events = parser.read_events()
4516
4517 parser.feed('<root><element>')
4518 self.assert_event_tags(
4519 events, [('start', 'root'), ('start', 'element')])
4520 self.assertFalse(list(events))
4521 parser.feed('</element><child>')
4522 self.assert_event_tags(
4523 events, [('end', 'element'), ('start', 'child')])
4524 parser.feed('</child>')
4525 self.assert_event_tags(
4526 events, [('end', 'child')])
4527 parser.feed('</root>')
4528 self.assert_event_tags(
4529 events, [('end', 'root')])
4530 self.assertFalse(list(events))
4531 root = parser.close()
4532 self.assertEqual('root', root.tag)
4533
4535 class Target(etree.TreeBuilder):
4536 def end(self, tag):
4537 el = super(Target, self).end(tag)
4538 el.tag += '-huhu'
4539 return el
4540
4541 parser = self.etree.XMLPullParser(
4542 ['start', 'end'], target=Target())
4543 events = parser.read_events()
4544
4545 parser.feed('<root><element>')
4546 self.assert_event_tags(
4547 events, [('start', 'root'), ('start', 'element')])
4548 self.assertFalse(list(events))
4549 parser.feed('</element><child>')
4550 self.assert_event_tags(
4551 events, [('end', 'element-huhu'), ('start', 'child')])
4552 parser.feed('</child>')
4553 self.assert_event_tags(
4554 events, [('end', 'child-huhu')])
4555 parser.feed('</root>')
4556 self.assert_event_tags(
4557 events, [('end', 'root-huhu')])
4558 self.assertFalse(list(events))
4559 root = parser.close()
4560 self.assertEqual('root-huhu', root.tag)
4561
4593
4594
4595 if __name__ == '__main__':
4596 print('to test use test.py %s' % __file__)
4597