215 lines
6.9 KiB
Python
215 lines
6.9 KiB
Python
|
# File: xmlutils.py
|
||
|
# Library: DOPAL - DO Python Azureus Library
|
||
|
#
|
||
|
# This program is free software; you can redistribute it and/or modify
|
||
|
# it under the terms of the GNU General Public License as published by
|
||
|
# the Free Software Foundation; version 2 of the License.
|
||
|
#
|
||
|
# This program is distributed in the hope that it will be useful,
|
||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
# GNU General Public License for more details ( see the COPYING file ).
|
||
|
#
|
||
|
# You should have received a copy of the GNU General Public License
|
||
|
# along with this program; if not, write to the Free Software
|
||
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||
|
|
||
|
'''
|
||
|
XML utility functions.
|
||
|
'''
|
||
|
|
||
|
# Given an object which has the same interface as xml.dom.Node:
|
||
|
# a) Join all concurrent text nodes together.
|
||
|
# b) Strip all trailing and leading whitespace from each text node.
|
||
|
#
|
||
|
# This function will recursively process the tree structure given in the node
|
||
|
# object. No value will be returned by this function, instead the given object
|
||
|
# will be modified.
|
||
|
def normalise_xml_structure(xml_node):
|
||
|
# Concurrent text nodes should be joined together.
|
||
|
xml_node.normalize()
|
||
|
|
||
|
# Strip all text nodes which are empty of content (whitespace is not
|
||
|
# content).
|
||
|
from xml.dom import Node
|
||
|
|
||
|
nodes_to_delete = []
|
||
|
|
||
|
for node in xml_node.childNodes:
|
||
|
if node.nodeType == Node.TEXT_NODE:
|
||
|
stripped_text = node.nodeValue.strip()
|
||
|
if stripped_text:
|
||
|
node.nodeValue = stripped_text
|
||
|
else:
|
||
|
nodes_to_delete.append(node)
|
||
|
else:
|
||
|
normalise_xml_structure(node)
|
||
|
|
||
|
for node in nodes_to_delete:
|
||
|
xml_node.removeChild(node)
|
||
|
node.unlink()
|
||
|
|
||
|
|
||
|
def get_text_content(node):
|
||
|
from xml.dom import Node
|
||
|
|
||
|
# Text content is stored directly in this node.
|
||
|
if node.nodeType == Node.TEXT_NODE:
|
||
|
return node.nodeValue
|
||
|
|
||
|
# Otherwise, must be in a child node.
|
||
|
#elif len(node.childNodes) == 1 and \
|
||
|
# node.firstChild.nodeType == Node.TEXT_NODE:
|
||
|
# return node.firstChild.nodeValue
|
||
|
|
||
|
# Sometimes happens for attributes with no real value.
|
||
|
elif len(node.childNodes) == 0:
|
||
|
return ''
|
||
|
|
||
|
text_node = None
|
||
|
err_text = None
|
||
|
for child in node.childNodes:
|
||
|
if child.nodeType == Node.TEXT_NODE:
|
||
|
if text_node is None:
|
||
|
text_node = child
|
||
|
else:
|
||
|
err_text = "contained multiple text nodes"
|
||
|
break
|
||
|
else:
|
||
|
if text_node is None:
|
||
|
if len(node.childNodes) != 1:
|
||
|
err_text = "contained multiple nodes, but none were text"
|
||
|
else:
|
||
|
err_text = "did not contain a character string as its value"
|
||
|
else:
|
||
|
return text_node.nodeValue
|
||
|
|
||
|
raise ValueError, ("the node %s " % node.nodeName) + err_text
|
||
|
|
||
|
|
||
|
from xml.sax.saxutils import quoteattr, escape
|
||
|
|
||
|
# This base class will be removed when XMLObject is removed.
|
||
|
class _XMLObjectBase(object):
|
||
|
def __init__(self, tag_name):
|
||
|
self.tag_name = tag_name
|
||
|
self.attributes = {}
|
||
|
self.contents = []
|
||
|
|
||
|
def add_attribute(self, attribute_name, attribute_value):
|
||
|
self.attributes[attribute_name] = attribute_value
|
||
|
|
||
|
def add_content(self, content):
|
||
|
self.contents.append(content)
|
||
|
|
||
|
def to_string(self, out=None, indent=0):
|
||
|
if out is None:
|
||
|
# We use StringIO instead of cStringIO not to lose unicode strings.
|
||
|
import StringIO
|
||
|
|
||
|
out = StringIO.StringIO()
|
||
|
return_as_string = True
|
||
|
else:
|
||
|
return_as_string = False
|
||
|
|
||
|
indent_string = ' ' * indent
|
||
|
out.write(indent_string)
|
||
|
out.write('<')
|
||
|
out.write(self.tag_name)
|
||
|
for attr_name, attr_value in self.attributes.items():
|
||
|
out.write(' ')
|
||
|
out.write(attr_name)
|
||
|
out.write('=')
|
||
|
out.write(quoteattr(attr_value))
|
||
|
|
||
|
# If we have no contents, we'll close the tag here.
|
||
|
if not self.contents:
|
||
|
out.write(' />\n')
|
||
|
|
||
|
else:
|
||
|
out.write('>')
|
||
|
|
||
|
# If we have one piece of content, which is just a string, then
|
||
|
# we'll put it on the same line as the opening tag is on.
|
||
|
if len(self.contents) == 1 and not hasattr(self.contents[0], 'to_string'):
|
||
|
out.write(escape(self.contents[0]))
|
||
|
|
||
|
# Otherwise, we assume we have some more XML blocks to write out,
|
||
|
# so we'll indent them and put them on newlines.
|
||
|
elif self.contents:
|
||
|
out.write('\n')
|
||
|
for content in self.contents:
|
||
|
content.to_string(out, indent + 2)
|
||
|
out.write(indent_string)
|
||
|
|
||
|
# Write out the closing tag (if we haven't written it already).
|
||
|
if self.contents:
|
||
|
out.write('</')
|
||
|
out.write(self.tag_name)
|
||
|
out.write('>\n')
|
||
|
|
||
|
# If the invocation of this method was not passed a buffer to write
|
||
|
# into, then we return the string representation.
|
||
|
if return_as_string:
|
||
|
return out.getvalue()
|
||
|
|
||
|
return None
|
||
|
|
||
|
|
||
|
class XMLObject(_XMLObjectBase):
|
||
|
'''
|
||
|
B{Deprecated:} An object representing a block of XML.
|
||
|
|
||
|
@attention: B{Deprecated:} This class does not provide any guarantees in
|
||
|
the way that byte strings are handled. Use L{UXMLObject} instead.
|
||
|
'''
|
||
|
|
||
|
def __init__(self, tag_name):
|
||
|
from dopal.errors import DopalPendingDeprecationWarning
|
||
|
|
||
|
import warnings
|
||
|
|
||
|
warnings.warn("XMLObject is deprecated - use UXMLObject instead", DopalPendingDeprecationWarning)
|
||
|
|
||
|
_XMLObjectBase.__init__(self, tag_name)
|
||
|
|
||
|
|
||
|
class UXMLObject(_XMLObjectBase):
|
||
|
'''
|
||
|
An object representing a block of XML.
|
||
|
|
||
|
Any string which is added to this block (either through the L{add_content}
|
||
|
or L{add_attribute} methods should be a unicode string, rather than a byte
|
||
|
string. If it is a byte string, then it must be a string which contains
|
||
|
text in the system's default encoding - attempting to add text encoding in
|
||
|
other formats is not allowed.
|
||
|
'''
|
||
|
|
||
|
def to_string(self, out=None, indent=0):
|
||
|
result = _XMLObjectBase.to_string(self, out, indent)
|
||
|
if result is None:
|
||
|
return None
|
||
|
return unicode(result)
|
||
|
|
||
|
def encode(self, encoding='UTF-8'):
|
||
|
return (('<?xml version="1.0" encoding="%s"?>\n' % encoding) + self.to_string()).encode(encoding)
|
||
|
|
||
|
def __unicode__(self):
|
||
|
return self.to_string()
|
||
|
|
||
|
|
||
|
def make_xml_ref_for_az_object(object_id):
|
||
|
'''
|
||
|
Creates an XML block which represents a remote object in Azureus with the given object ID.
|
||
|
|
||
|
@param object_id: The object ID to reference.
|
||
|
@type object_id: int / long
|
||
|
@return: A L{UXMLObject} instance.
|
||
|
'''
|
||
|
object_id_block = UXMLObject('_object_id')
|
||
|
object_id_block.add_content(str(object_id))
|
||
|
|
||
|
object_block = UXMLObject('OBJECT')
|
||
|
object_block.add_content(object_id_block)
|
||
|
return object_block
|