source: sasview/src/sas/sascalc/dataloader/readers/xml_reader.py @ 25dd9c9

Last change on this file since 25dd9c9 was 235f514, checked in by andyfaff, 8 years ago

MAINT: replace '== None' by 'is None'

  • Property mode set to 100644
File size: 9.8 KB
Line 
1"""
2    Generic XML read and write utility
3
4    Usage: Either extend xml_reader or add as a class variable.
5"""
6############################################################################
7#This software was developed by the University of Tennessee as part of the
8#Distributed Data Analysis of Neutron Scattering Experiments (DANSE)
9#project funded by the US National Science Foundation.
10#If you use DANSE applications to do scientific research that leads to
11#publication, we ask that you acknowledge the use of the software with the
12#following sentence:
13#This work benefited from DANSE software developed under NSF award DMR-0520547.
14#copyright 2008,2009 University of Tennessee
15#############################################################################
16
17import logging
18from lxml import etree
19from lxml.builder import E
20
21logger = logging.getLogger(__name__)
22
23PARSER = etree.ETCompatXMLParser(remove_comments=True, remove_pis=False)
24
25class XMLreader():
26    """
27    Generic XML read and write class. Mostly helper functions.
28    Makes reading/writing XML a bit easier than calling lxml libraries directly.
29
30    :Dependencies:
31        This class requires lxml 2.3 or higher.
32    """
33
34    xml = None
35    xmldoc = None
36    xmlroot = None
37    schema = None
38    schemadoc = None
39    encoding = None
40    processing_instructions = None
41
42    def __init__(self, xml=None, schema=None):
43        self.xml = xml
44        self.schema = schema
45        self.processing_instructions = {}
46        if xml is not None:
47            self.set_xml_file(xml)
48        else:
49            self.xmldoc = None
50            self.xmlroot = None
51        if schema is not None:
52            self.set_schema(schema)
53        else:
54            self.schemadoc = None
55
56    def reader(self):
57        """
58        Read in an XML file into memory and return an lxml dictionary
59        """
60        if self.validate_xml():
61            self.xmldoc = etree.parse(self.xml, parser=PARSER)
62        else:
63            raise etree.XMLSchemaValidateError(self, self.find_invalid_xml())
64        return self.xmldoc
65
66    def set_xml_file(self, xml):
67        """
68        Set the XML file and parse
69        """
70        try:
71            self.xml = xml
72            self.xmldoc = etree.parse(self.xml, parser=PARSER)
73            self.xmlroot = self.xmldoc.getroot()
74        except etree.XMLSyntaxError as xml_error:
75            logger.info(xml_error)
76        except Exception:
77            self.xml = None
78            self.xmldoc = None
79            self.xmlroot = None
80
81    def set_xml_string(self, tag_soup):
82        """
83        Set an XML string as the working XML.
84
85        :param tag_soup: XML formatted string
86        """
87        try:
88            self.xml = tag_soup
89            self.xmldoc = tag_soup
90            self.xmlroot = etree.fromstring(tag_soup)
91        except etree.XMLSyntaxError as xml_error:
92            logger.info(xml_error)
93        except Exception:
94            self.xml = None
95            self.xmldoc = None
96            self.xmlroot = None
97
98    def set_schema(self, schema):
99        """
100        Set the schema file and parse
101        """
102        try:
103            self.schema = schema
104            self.schemadoc = etree.parse(self.schema, parser=PARSER)
105        except etree.XMLSyntaxError as xml_error:
106            logger.info(xml_error)
107        except Exception:
108            self.schema = None
109            self.schemadoc = None
110
111    def validate_xml(self):
112        """
113        Checks to see if the XML file meets the schema
114        """
115        valid = True
116        if self.schema is not None:
117            self.parse_schema_and_doc()
118            schema_check = etree.XMLSchema(self.schemadoc)
119            valid = schema_check.validate(self.xmldoc)
120        return valid
121
122    def find_invalid_xml(self):
123        """
124        Finds the first offending element that should not be present in XML file
125        """
126        first_error = ""
127        self.parse_schema_and_doc()
128        schema = etree.XMLSchema(self.schemadoc)
129        try:
130            first_error = schema.assertValid(self.xmldoc)
131        except etree.DocumentInvalid as err:
132            first_error = str(err)
133        return first_error
134
135    def parse_schema_and_doc(self):
136        """
137        Creates a dictionary of the parsed schema and xml files.
138        """
139        self.set_xml_file(self.xml)
140        self.set_schema(self.schema)
141
142    def to_string(self, elem, pretty_print=False, encoding=None):
143        """
144        Converts an etree element into a string
145        """
146        return etree.tostring(elem, pretty_print=pretty_print, \
147                              encoding=encoding)
148
149    def break_processing_instructions(self, string, dic):
150        """
151        Method to break a processing instruction string apart and add to a dict
152
153        :param string: A processing instruction as a string
154        :param dic: The dictionary to save the PIs to
155        """
156        pi_string = string.replace("<?", "").replace("?>", "")
157        split = pi_string.split(" ", 1)
158        pi_name = split[0]
159        attr = split[1]
160        new_pi_name = self._create_unique_key(dic, pi_name)
161        dic[new_pi_name] = attr
162        return dic
163
164    def set_processing_instructions(self):
165        """
166        Take out all processing instructions and create a dictionary from them
167        If there is a default encoding, the value is also saved
168        """
169        dic = {}
170        proc_instr = self.xmlroot.getprevious()
171        while proc_instr is not None:
172            pi_string = self.to_string(proc_instr)
173            if "?>\n<?" in pi_string:
174                pi_string = pi_string.split("?>\n<?")
175            if isinstance(pi_string, str):
176                dic = self.break_processing_instructions(pi_string, dic)
177            elif isinstance(pi_string, list):
178                for item in pi_string:
179                    dic = self.break_processing_instructions(item, dic)
180            proc_instr = proc_instr.getprevious()
181        if 'xml' in dic:
182            self.set_encoding(dic['xml'])
183            del dic['xml']
184        self.processing_instructions = dic
185
186    def set_encoding(self, attr_str):
187        """
188        Find the encoding in the xml declaration and save it as a string
189
190        :param attr_str: All attributes as a string
191            e.g. "foo1="bar1" foo2="bar2" foo3="bar3" ... foo_n="bar_n""
192        """
193        attr_str = attr_str.replace(" = ", "=")
194        attr_list = attr_str.split()
195        for item in attr_list:
196            name_value = item.split("\"=")
197            name = name_value[0].lower()
198            value = name_value[1]
199            if name == "encoding":
200                self.encoding = value
201                return
202        self.encoding = None
203
204    def _create_unique_key(self, dictionary, name, numb=0):
205        """
206        Create a unique key value for any dictionary to prevent overwriting
207        Recurses until a unique key value is found.
208       
209        :param dictionary: A dictionary with any number of entries
210        :param name: The index of the item to be added to dictionary
211        :param numb: The number to be appended to the name, starts at 0
212        """
213        if dictionary.get(name) is not None:
214            numb += 1
215            name = name.split("_")[0]
216            name += "_{0}".format(numb)
217            name = self._create_unique_key(dictionary, name, numb)
218        return name
219
220    def create_tree(self, root):
221        """
222        Create an element tree for processing from an etree element
223
224        :param root: etree Element(s)
225        """
226        return etree.ElementTree(root)
227
228    def create_element_from_string(self, xml_string):
229        """
230        Create an element from an XML string
231
232        :param xml_string: A string of xml
233        """
234        return etree.fromstring(xml_string)
235
236    def create_element(self, name, attrib=None, nsmap=None):
237        """
238        Create an XML element for writing to file
239
240        :param name: The name of the element to be created
241        """
242        if attrib is None:
243            attrib = {}
244        return etree.Element(name, attrib, nsmap)
245
246    def write_text(self, elem, text):
247        """
248        Write text to an etree Element
249
250        :param elem: etree.Element object
251        :param text: text to write to the element
252        """
253        elem.text = text
254        return elem
255
256    def write_attribute(self, elem, attr_name, attr_value):
257        """
258        Write attributes to an Element
259
260        :param elem: etree.Element object
261        :param attr_name: attribute name to write
262        :param attr_value: attribute value to set
263        """
264        attr = elem.attrib
265        attr[attr_name] = attr_value
266
267    def return_processing_instructions(self):
268        """
269        Get all processing instructions saved when loading the document
270
271        :param tree: etree.ElementTree object to write PIs to
272        """
273        pi_list = []
274        if self.processing_instructions is not None:
275            for key in self.processing_instructions:
276                value = self.processing_instructions.get(key)
277                pi_item = etree.ProcessingInstruction(key, value)
278                pi_list.append(pi_item)
279        return pi_list
280
281    def append(self, element, tree):
282        """
283        Append an etree Element to an ElementTree.
284
285        :param element: etree Element to append
286        :param tree: ElementTree object to append to
287        """
288        tree = tree.append(element)
289        return tree
290
291    def ebuilder(self, parent, elementname, text=None, attrib=None):
292        """
293        Use lxml E builder class with arbitrary inputs.
294
295        :param parnet: The parent element to append a child to
296        :param elementname: The name of the child in string form
297        :param text: The element text
298        :param attrib: A dictionary of attribute names to attribute values
299        """
300        text = str(text)
301        if attrib is None:
302            attrib = {}
303        elem = E(elementname, attrib, text)
304        parent = parent.append(elem)
305        return parent
Note: See TracBrowser for help on using the repository browser.