Package CedarBackup2 :: Module xmlutil
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.xmlutil

  1  # -*- coding: iso-8859-1 -*- 
  2  # vim: set ft=python ts=3 sw=3 expandtab: 
  3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  4  # 
  5  #              C E D A R 
  6  #          S O L U T I O N S       "Software done right." 
  7  #           S O F T W A R E 
  8  # 
  9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 10  # 
 11  # Copyright (c) 2004-2006,2010 Kenneth J. Pronovici. 
 12  # All rights reserved. 
 13  # 
 14  # Portions Copyright (c) 2000 Fourthought Inc, USA. 
 15  # All Rights Reserved. 
 16  # 
 17  # This program is free software; you can redistribute it and/or 
 18  # modify it under the terms of the GNU General Public License, 
 19  # Version 2, as published by the Free Software Foundation. 
 20  # 
 21  # This program is distributed in the hope that it will be useful, 
 22  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 23  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
 24  # 
 25  # Copies of the GNU General Public License are available from 
 26  # the Free Software Foundation website, http://www.gnu.org/. 
 27  # 
 28  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 29  # 
 30  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
 31  # Language : Python (>= 2.5) 
 32  # Project  : Cedar Backup, release 2 
 33  # Purpose  : Provides general XML-related functionality. 
 34  # 
 35  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 36   
 37  ######################################################################## 
 38  # Module documentation 
 39  ######################################################################## 
 40   
 41  """ 
 42  Provides general XML-related functionality. 
 43   
 44  What I'm trying to do here is abstract much of the functionality that directly 
 45  accesses the DOM tree.  This is not so much to "protect" the other code from 
 46  the DOM, but to standardize the way it's used.  It will also help extension 
 47  authors write code that easily looks more like the rest of Cedar Backup. 
 48   
 49  @sort: createInputDom, createOutputDom, serializeDom, isElement, readChildren,  
 50         readFirstChild, readStringList, readString, readInteger, readBoolean, 
 51         addContainerNode, addStringNode, addIntegerNode, addBooleanNode, 
 52         TRUE_BOOLEAN_VALUES, FALSE_BOOLEAN_VALUES, VALID_BOOLEAN_VALUES 
 53   
 54  @var TRUE_BOOLEAN_VALUES: List of boolean values in XML representing C{True}. 
 55  @var FALSE_BOOLEAN_VALUES: List of boolean values in XML representing C{False}. 
 56  @var VALID_BOOLEAN_VALUES: List of valid boolean values in XML. 
 57   
 58  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
 59  """ 
 60  # pylint: disable=C0111,C0103,W0511,W0104,W0106 
 61   
 62  ######################################################################## 
 63  # Imported modules 
 64  ######################################################################## 
 65   
 66  # System modules 
 67  import sys 
 68  import re 
 69  import logging 
 70  import codecs 
 71  from types import UnicodeType 
 72  from StringIO import StringIO 
 73   
 74  # XML-related modules 
 75  from xml.parsers.expat import ExpatError 
 76  from xml.dom.minidom import Node 
 77  from xml.dom.minidom import getDOMImplementation 
 78  from xml.dom.minidom import parseString 
 79   
 80   
 81  ######################################################################## 
 82  # Module-wide constants and variables 
 83  ######################################################################## 
 84   
 85  logger = logging.getLogger("CedarBackup2.log.xml") 
 86   
 87  TRUE_BOOLEAN_VALUES   = [ "Y", "y", ] 
 88  FALSE_BOOLEAN_VALUES  = [ "N", "n", ] 
 89  VALID_BOOLEAN_VALUES  = TRUE_BOOLEAN_VALUES + FALSE_BOOLEAN_VALUES 
 90   
 91   
 92  ######################################################################## 
 93  # Functions for creating and parsing DOM trees 
 94  ######################################################################## 
 95   
96 -def createInputDom(xmlData, name="cb_config"):
97 """ 98 Creates a DOM tree based on reading an XML string. 99 @param name: Assumed base name of the document (root node name). 100 @return: Tuple (xmlDom, parentNode) for the parsed document 101 @raise ValueError: If the document can't be parsed. 102 """ 103 try: 104 xmlDom = parseString(xmlData) 105 parentNode = readFirstChild(xmlDom, name) 106 return (xmlDom, parentNode) 107 except (IOError, ExpatError), e: 108 raise ValueError("Unable to parse XML document: %s" % e)
109
110 -def createOutputDom(name="cb_config"):
111 """ 112 Creates a DOM tree used for writing an XML document. 113 @param name: Base name of the document (root node name). 114 @return: Tuple (xmlDom, parentNode) for the new document 115 """ 116 impl = getDOMImplementation() 117 xmlDom = impl.createDocument(None, name, None) 118 return (xmlDom, xmlDom.documentElement)
119 120 121 ######################################################################## 122 # Functions for reading values out of XML documents 123 ######################################################################## 124
125 -def isElement(node):
126 """ 127 Returns True or False depending on whether the XML node is an element node. 128 """ 129 return node.nodeType == Node.ELEMENT_NODE
130
131 -def readChildren(parent, name):
132 """ 133 Returns a list of nodes with a given name immediately beneath the 134 parent. 135 136 By "immediately beneath" the parent, we mean from among nodes that are 137 direct children of the passed-in parent node. 138 139 Underneath, we use the Python C{getElementsByTagName} method, which is 140 pretty cool, but which (surprisingly?) returns a list of all children 141 with a given name below the parent, at any level. We just prune that 142 list to include only children whose C{parentNode} matches the passed-in 143 parent. 144 145 @param parent: Parent node to search beneath. 146 @param name: Name of nodes to search for. 147 148 @return: List of child nodes with correct parent, or an empty list if 149 no matching nodes are found. 150 """ 151 lst = [] 152 if parent is not None: 153 result = parent.getElementsByTagName(name) 154 for entry in result: 155 if entry.parentNode is parent: 156 lst.append(entry) 157 return lst
158
159 -def readFirstChild(parent, name):
160 """ 161 Returns the first child with a given name immediately beneath the parent. 162 163 By "immediately beneath" the parent, we mean from among nodes that are 164 direct children of the passed-in parent node. 165 166 @param parent: Parent node to search beneath. 167 @param name: Name of node to search for. 168 169 @return: First properly-named child of parent, or C{None} if no matching nodes are found. 170 """ 171 result = readChildren(parent, name) 172 if result is None or result == []: 173 return None 174 return result[0]
175
176 -def readStringList(parent, name):
177 """ 178 Returns a list of the string contents associated with nodes with a given 179 name immediately beneath the parent. 180 181 By "immediately beneath" the parent, we mean from among nodes that are 182 direct children of the passed-in parent node. 183 184 First, we find all of the nodes using L{readChildren}, and then we 185 retrieve the "string contents" of each of those nodes. The returned list 186 has one entry per matching node. We assume that string contents of a 187 given node belong to the first C{TEXT_NODE} child of that node. Nodes 188 which have no C{TEXT_NODE} children are not represented in the returned 189 list. 190 191 @param parent: Parent node to search beneath. 192 @param name: Name of node to search for. 193 194 @return: List of strings as described above, or C{None} if no matching nodes are found. 195 """ 196 lst = [] 197 result = readChildren(parent, name) 198 for entry in result: 199 if entry.hasChildNodes(): 200 for child in entry.childNodes: 201 if child.nodeType == Node.TEXT_NODE: 202 lst.append(child.nodeValue) 203 break 204 if lst == []: 205 lst = None 206 return lst
207
208 -def readString(parent, name):
209 """ 210 Returns string contents of the first child with a given name immediately 211 beneath the parent. 212 213 By "immediately beneath" the parent, we mean from among nodes that are 214 direct children of the passed-in parent node. We assume that string 215 contents of a given node belong to the first C{TEXT_NODE} child of that 216 node. 217 218 @param parent: Parent node to search beneath. 219 @param name: Name of node to search for. 220 221 @return: String contents of node or C{None} if no matching nodes are found. 222 """ 223 result = readStringList(parent, name) 224 if result is None: 225 return None 226 return result[0]
227
228 -def readInteger(parent, name):
229 """ 230 Returns integer contents of the first child with a given name immediately 231 beneath the parent. 232 233 By "immediately beneath" the parent, we mean from among nodes that are 234 direct children of the passed-in parent node. 235 236 @param parent: Parent node to search beneath. 237 @param name: Name of node to search for. 238 239 @return: Integer contents of node or C{None} if no matching nodes are found. 240 @raise ValueError: If the string at the location can't be converted to an integer. 241 """ 242 result = readString(parent, name) 243 if result is None: 244 return None 245 else: 246 return int(result)
247
248 -def readLong(parent, name):
249 """ 250 Returns long integer contents of the first child with a given name immediately 251 beneath the parent. 252 253 By "immediately beneath" the parent, we mean from among nodes that are 254 direct children of the passed-in parent node. 255 256 @param parent: Parent node to search beneath. 257 @param name: Name of node to search for. 258 259 @return: Long integer contents of node or C{None} if no matching nodes are found. 260 @raise ValueError: If the string at the location can't be converted to an integer. 261 """ 262 result = readString(parent, name) 263 if result is None: 264 return None 265 else: 266 return long(result)
267
268 -def readFloat(parent, name):
269 """ 270 Returns float contents of the first child with a given name immediately 271 beneath the parent. 272 273 By "immediately beneath" the parent, we mean from among nodes that are 274 direct children of the passed-in parent node. 275 276 @param parent: Parent node to search beneath. 277 @param name: Name of node to search for. 278 279 @return: Float contents of node or C{None} if no matching nodes are found. 280 @raise ValueError: If the string at the location can't be converted to a 281 float value. 282 """ 283 result = readString(parent, name) 284 if result is None: 285 return None 286 else: 287 return float(result)
288
289 -def readBoolean(parent, name):
290 """ 291 Returns boolean contents of the first child with a given name immediately 292 beneath the parent. 293 294 By "immediately beneath" the parent, we mean from among nodes that are 295 direct children of the passed-in parent node. 296 297 The string value of the node must be one of the values in L{VALID_BOOLEAN_VALUES}. 298 299 @param parent: Parent node to search beneath. 300 @param name: Name of node to search for. 301 302 @return: Boolean contents of node or C{None} if no matching nodes are found. 303 @raise ValueError: If the string at the location can't be converted to a boolean. 304 """ 305 result = readString(parent, name) 306 if result is None: 307 return None 308 else: 309 if result in TRUE_BOOLEAN_VALUES: 310 return True 311 elif result in FALSE_BOOLEAN_VALUES: 312 return False 313 else: 314 raise ValueError("Boolean values must be one of %s." % VALID_BOOLEAN_VALUES)
315 316 317 ######################################################################## 318 # Functions for writing values into XML documents 319 ######################################################################## 320
321 -def addContainerNode(xmlDom, parentNode, nodeName):
322 """ 323 Adds a container node as the next child of a parent node. 324 325 @param xmlDom: DOM tree as from C{impl.createDocument()}. 326 @param parentNode: Parent node to create child for. 327 @param nodeName: Name of the new container node. 328 329 @return: Reference to the newly-created node. 330 """ 331 containerNode = xmlDom.createElement(nodeName) 332 parentNode.appendChild(containerNode) 333 return containerNode
334
335 -def addStringNode(xmlDom, parentNode, nodeName, nodeValue):
336 """ 337 Adds a text node as the next child of a parent, to contain a string. 338 339 If the C{nodeValue} is None, then the node will be created, but will be 340 empty (i.e. will contain no text node child). 341 342 @param xmlDom: DOM tree as from C{impl.createDocument()}. 343 @param parentNode: Parent node to create child for. 344 @param nodeName: Name of the new container node. 345 @param nodeValue: The value to put into the node. 346 347 @return: Reference to the newly-created node. 348 """ 349 containerNode = addContainerNode(xmlDom, parentNode, nodeName) 350 if nodeValue is not None: 351 textNode = xmlDom.createTextNode(nodeValue) 352 containerNode.appendChild(textNode) 353 return containerNode
354
355 -def addIntegerNode(xmlDom, parentNode, nodeName, nodeValue):
356 """ 357 Adds a text node as the next child of a parent, to contain an integer. 358 359 If the C{nodeValue} is None, then the node will be created, but will be 360 empty (i.e. will contain no text node child). 361 362 The integer will be converted to a string using "%d". The result will be 363 added to the document via L{addStringNode}. 364 365 @param xmlDom: DOM tree as from C{impl.createDocument()}. 366 @param parentNode: Parent node to create child for. 367 @param nodeName: Name of the new container node. 368 @param nodeValue: The value to put into the node. 369 370 @return: Reference to the newly-created node. 371 """ 372 if nodeValue is None: 373 return addStringNode(xmlDom, parentNode, nodeName, None) 374 else: 375 return addStringNode(xmlDom, parentNode, nodeName, "%d" % nodeValue) # %d works for both int and long
376
377 -def addLongNode(xmlDom, parentNode, nodeName, nodeValue):
378 """ 379 Adds a text node as the next child of a parent, to contain a long integer. 380 381 If the C{nodeValue} is None, then the node will be created, but will be 382 empty (i.e. will contain no text node child). 383 384 The integer will be converted to a string using "%d". The result will be 385 added to the document via L{addStringNode}. 386 387 @param xmlDom: DOM tree as from C{impl.createDocument()}. 388 @param parentNode: Parent node to create child for. 389 @param nodeName: Name of the new container node. 390 @param nodeValue: The value to put into the node. 391 392 @return: Reference to the newly-created node. 393 """ 394 if nodeValue is None: 395 return addStringNode(xmlDom, parentNode, nodeName, None) 396 else: 397 return addStringNode(xmlDom, parentNode, nodeName, "%d" % nodeValue) # %d works for both int and long
398
399 -def addBooleanNode(xmlDom, parentNode, nodeName, nodeValue):
400 """ 401 Adds a text node as the next child of a parent, to contain a boolean. 402 403 If the C{nodeValue} is None, then the node will be created, but will be 404 empty (i.e. will contain no text node child). 405 406 Boolean C{True}, or anything else interpreted as C{True} by Python, will 407 be converted to a string "Y". Anything else will be converted to a 408 string "N". The result is added to the document via L{addStringNode}. 409 410 @param xmlDom: DOM tree as from C{impl.createDocument()}. 411 @param parentNode: Parent node to create child for. 412 @param nodeName: Name of the new container node. 413 @param nodeValue: The value to put into the node. 414 415 @return: Reference to the newly-created node. 416 """ 417 if nodeValue is None: 418 return addStringNode(xmlDom, parentNode, nodeName, None) 419 else: 420 if nodeValue: 421 return addStringNode(xmlDom, parentNode, nodeName, "Y") 422 else: 423 return addStringNode(xmlDom, parentNode, nodeName, "N")
424 425 426 ######################################################################## 427 # Functions for serializing DOM trees 428 ######################################################################## 429
430 -def serializeDom(xmlDom, indent=3):
431 """ 432 Serializes a DOM tree and returns the result in a string. 433 @param xmlDom: XML DOM tree to serialize 434 @param indent: Number of spaces to indent, as an integer 435 @return: String form of DOM tree, pretty-printed. 436 """ 437 xmlBuffer = StringIO() 438 serializer = Serializer(xmlBuffer, "UTF-8", indent=indent) 439 serializer.serialize(xmlDom) 440 xmlData = xmlBuffer.getvalue() 441 xmlBuffer.close() 442 return xmlData
443
444 -class Serializer(object):
445 446 """ 447 XML serializer class. 448 449 This is a customized serializer that I hacked together based on what I found 450 in the PyXML distribution. Basically, around release 2.7.0, the only reason 451 I still had around a dependency on PyXML was for the PrettyPrint 452 functionality, and that seemed pointless. So, I stripped the PrettyPrint 453 code out of PyXML and hacked bits of it off until it did just what I needed 454 and no more. 455 456 This code started out being called PrintVisitor, but I decided it makes more 457 sense just calling it a serializer. I've made nearly all of the methods 458 private, and I've added a new high-level serialize() method rather than 459 having clients call C{visit()}. 460 461 Anyway, as a consequence of my hacking with it, this can't quite be called a 462 complete XML serializer any more. I ripped out support for HTML and XHTML, 463 and there is also no longer any support for namespaces (which I took out 464 because this dragged along a lot of extra code, and Cedar Backup doesn't use 465 namespaces). However, everything else should pretty much work as expected. 466 467 @copyright: This code, prior to customization, was part of the PyXML 468 codebase, and before that was part of the 4DOM suite developed by 469 Fourthought, Inc. It its original form, it was Copyright (c) 2000 470 Fourthought Inc, USA; All Rights Reserved. 471 """ 472
473 - def __init__(self, stream=sys.stdout, encoding="UTF-8", indent=3):
474 """ 475 Initialize a serializer. 476 @param stream: Stream to write output to. 477 @param encoding: Output encoding. 478 @param indent: Number of spaces to indent, as an integer 479 """ 480 self.stream = stream 481 self.encoding = encoding 482 self._indent = indent * " " 483 self._depth = 0 484 self._inText = 0
485
486 - def serialize(self, xmlDom):
487 """ 488 Serialize the passed-in XML document. 489 @param xmlDom: XML DOM tree to serialize 490 @raise ValueError: If there's an unknown node type in the document. 491 """ 492 self._visit(xmlDom) 493 self.stream.write("\n")
494
495 - def _write(self, text):
496 obj = _encodeText(text, self.encoding) 497 self.stream.write(obj) 498 return
499
500 - def _tryIndent(self):
501 if not self._inText and self._indent: 502 self._write('\n' + self._indent*self._depth) 503 return
504
505 - def _visit(self, node):
506 """ 507 @raise ValueError: If there's an unknown node type in the document. 508 """ 509 if node.nodeType == Node.ELEMENT_NODE: 510 return self._visitElement(node) 511 512 elif node.nodeType == Node.ATTRIBUTE_NODE: 513 return self._visitAttr(node) 514 515 elif node.nodeType == Node.TEXT_NODE: 516 return self._visitText(node) 517 518 elif node.nodeType == Node.CDATA_SECTION_NODE: 519 return self._visitCDATASection(node) 520 521 elif node.nodeType == Node.ENTITY_REFERENCE_NODE: 522 return self._visitEntityReference(node) 523 524 elif node.nodeType == Node.ENTITY_NODE: 525 return self._visitEntity(node) 526 527 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: 528 return self._visitProcessingInstruction(node) 529 530 elif node.nodeType == Node.COMMENT_NODE: 531 return self._visitComment(node) 532 533 elif node.nodeType == Node.DOCUMENT_NODE: 534 return self._visitDocument(node) 535 536 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 537 return self._visitDocumentType(node) 538 539 elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: 540 return self._visitDocumentFragment(node) 541 542 elif node.nodeType == Node.NOTATION_NODE: 543 return self._visitNotation(node) 544 545 # It has a node type, but we don't know how to handle it 546 raise ValueError("Unknown node type: %s" % repr(node))
547
548 - def _visitNodeList(self, node, exclude=None):
549 for curr in node: 550 curr is not exclude and self._visit(curr) 551 return
552
553 - def _visitNamedNodeMap(self, node):
554 for item in node.values(): 555 self._visit(item) 556 return
557
558 - def _visitAttr(self, node):
559 self._write(' ' + node.name) 560 value = node.value 561 text = _translateCDATA(value, self.encoding) 562 text, delimiter = _translateCDATAAttr(text) 563 self.stream.write("=%s%s%s" % (delimiter, text, delimiter)) 564 return
565
566 - def _visitProlog(self):
567 self._write("<?xml version='1.0' encoding='%s'?>" % (self.encoding or 'utf-8')) 568 self._inText = 0 569 return
570
571 - def _visitDocument(self, node):
572 self._visitProlog() 573 node.doctype and self._visitDocumentType(node.doctype) 574 self._visitNodeList(node.childNodes, exclude=node.doctype) 575 return
576
577 - def _visitDocumentFragment(self, node):
578 self._visitNodeList(node.childNodes) 579 return
580
581 - def _visitElement(self, node):
582 self._tryIndent() 583 self._write('<%s' % node.tagName) 584 for attr in node.attributes.values(): 585 self._visitAttr(attr) 586 if len(node.childNodes): 587 self._write('>') 588 self._depth = self._depth + 1 589 self._visitNodeList(node.childNodes) 590 self._depth = self._depth - 1 591 not (self._inText) and self._tryIndent() 592 self._write('</%s>' % node.tagName) 593 else: 594 self._write('/>') 595 self._inText = 0 596 return
597
598 - def _visitText(self, node):
599 text = node.data 600 if self._indent: 601 text.strip() 602 if text: 603 text = _translateCDATA(text, self.encoding) 604 self.stream.write(text) 605 self._inText = 1 606 return
607
608 - def _visitDocumentType(self, doctype):
609 if not doctype.systemId and not doctype.publicId: return 610 self._tryIndent() 611 self._write('<!DOCTYPE %s' % doctype.name) 612 if doctype.systemId and '"' in doctype.systemId: 613 system = "'%s'" % doctype.systemId 614 else: 615 system = '"%s"' % doctype.systemId 616 if doctype.publicId and '"' in doctype.publicId: 617 # We should probably throw an error 618 # Valid characters: <space> | <newline> | <linefeed> | 619 # [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] 620 public = "'%s'" % doctype.publicId 621 else: 622 public = '"%s"' % doctype.publicId 623 if doctype.publicId and doctype.systemId: 624 self._write(' PUBLIC %s %s' % (public, system)) 625 elif doctype.systemId: 626 self._write(' SYSTEM %s' % system) 627 if doctype.entities or doctype.notations: 628 self._write(' [') 629 self._depth = self._depth + 1 630 self._visitNamedNodeMap(doctype.entities) 631 self._visitNamedNodeMap(doctype.notations) 632 self._depth = self._depth - 1 633 self._tryIndent() 634 self._write(']>') 635 else: 636 self._write('>') 637 self._inText = 0 638 return
639
640 - def _visitEntity(self, node):
641 """Visited from a NamedNodeMap in DocumentType""" 642 self._tryIndent() 643 self._write('<!ENTITY %s' % (node.nodeName)) 644 node.publicId and self._write(' PUBLIC %s' % node.publicId) 645 node.systemId and self._write(' SYSTEM %s' % node.systemId) 646 node.notationName and self._write(' NDATA %s' % node.notationName) 647 self._write('>') 648 return
649
650 - def _visitNotation(self, node):
651 """Visited from a NamedNodeMap in DocumentType""" 652 self._tryIndent() 653 self._write('<!NOTATION %s' % node.nodeName) 654 node.publicId and self._write(' PUBLIC %s' % node.publicId) 655 node.systemId and self._write(' SYSTEM %s' % node.systemId) 656 self._write('>') 657 return
658
659 - def _visitCDATASection(self, node):
660 self._tryIndent() 661 self._write('<![CDATA[%s]]>' % (node.data)) 662 self._inText = 0 663 return
664
665 - def _visitComment(self, node):
666 self._tryIndent() 667 self._write('<!--%s-->' % (node.data)) 668 self._inText = 0 669 return
670
671 - def _visitEntityReference(self, node):
672 self._write('&%s;' % node.nodeName) 673 self._inText = 1 674 return
675
676 - def _visitProcessingInstruction(self, node):
677 self._tryIndent() 678 self._write('<?%s %s?>' % (node.target, node.data)) 679 self._inText = 0 680 return
681
682 -def _encodeText(text, encoding):
683 """ 684 @copyright: This code, prior to customization, was part of the PyXML 685 codebase, and before that was part of the 4DOM suite developed by 686 Fourthought, Inc. It its original form, it was attributed to Martin v. 687 Löwis and was Copyright (c) 2000 Fourthought Inc, USA; All Rights Reserved. 688 """ 689 encoder = codecs.lookup(encoding)[0] # encode,decode,reader,writer 690 if type(text) is not UnicodeType: 691 text = unicode(text, "utf-8") 692 return encoder(text)[0] # result,size
693
694 -def _translateCDATAAttr(characters):
695 """ 696 Handles normalization and some intelligence about quoting. 697 698 @copyright: This code, prior to customization, was part of the PyXML 699 codebase, and before that was part of the 4DOM suite developed by 700 Fourthought, Inc. It its original form, it was Copyright (c) 2000 701 Fourthought Inc, USA; All Rights Reserved. 702 """ 703 if not characters: 704 return '', "'" 705 if "'" in characters: 706 delimiter = '"' 707 new_chars = re.sub('"', '&quot;', characters) 708 else: 709 delimiter = "'" 710 new_chars = re.sub("'", '&apos;', characters) 711 #FIXME: There's more to normalization 712 #Convert attribute new-lines to character entity 713 # characters is possibly shorter than new_chars (no entities) 714 if "\n" in characters: 715 new_chars = re.sub('\n', '&#10;', new_chars) 716 return new_chars, delimiter
717 718 #Note: Unicode object only for now
719 -def _translateCDATA(characters, encoding='UTF-8', prev_chars='', markupSafe=0):
720 """ 721 @copyright: This code, prior to customization, was part of the PyXML 722 codebase, and before that was part of the 4DOM suite developed by 723 Fourthought, Inc. It its original form, it was Copyright (c) 2000 724 Fourthought Inc, USA; All Rights Reserved. 725 """ 726 CDATA_CHAR_PATTERN = re.compile('[&<]|]]>') 727 CHAR_TO_ENTITY = { '&': '&amp;', '<': '&lt;', ']]>': ']]&gt;', } 728 ILLEGAL_LOW_CHARS = '[\x01-\x08\x0B-\x0C\x0E-\x1F]' 729 ILLEGAL_HIGH_CHARS = '\xEF\xBF[\xBE\xBF]' 730 XML_ILLEGAL_CHAR_PATTERN = re.compile('%s|%s'%(ILLEGAL_LOW_CHARS, ILLEGAL_HIGH_CHARS)) 731 if not characters: 732 return '' 733 if not markupSafe: 734 if CDATA_CHAR_PATTERN.search(characters): 735 new_string = CDATA_CHAR_PATTERN.subn(lambda m, d=CHAR_TO_ENTITY: d[m.group()], characters)[0] 736 else: 737 new_string = characters 738 if prev_chars[-2:] == ']]' and characters[0] == '>': 739 new_string = '&gt;' + new_string[1:] 740 else: 741 new_string = characters 742 #Note: use decimal char entity rep because some browsers are broken 743 #FIXME: This will bomb for high characters. Should, for instance, detect 744 #The UTF-8 for 0xFFFE and put out &#xFFFE; 745 if XML_ILLEGAL_CHAR_PATTERN.search(new_string): 746 new_string = XML_ILLEGAL_CHAR_PATTERN.subn(lambda m: '&#%i;' % ord(m.group()), new_string)[0] 747 new_string = _encodeText(new_string, encoding) 748 return new_string
749