source: sasview/src/sas/sascalc/dataloader/readers/xml_reader.py @ 527a190

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalcmagnetic_scattrelease-4.1.1release-4.1.2release-4.2.2ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since 527a190 was 527a190, checked in by krzywon, 7 years ago

Loading XML files saved through no longer throwing errors. see #938

  • Property mode set to 100644
File size: 9.9 KB
Line 
1"""
2    Generic XML read and write utility
3
4    Usage: Either extend xml_reader or add as a class variable.
5"""
6############################################################################
7#This software was developed by the University of Tennessee as part of the
8#Distributed Data Analysis of Neutron Scattering Experiments (DANSE)
9#project funded by the US National Science Foundation.
10#If you use DANSE applications to do scientific research that leads to
11#publication, we ask that you acknowledge the use of the software with the
12#following sentence:
13#This work benefited from DANSE software developed under NSF award DMR-0520547.
14#copyright 2008,2009 University of Tennessee
15#############################################################################
16
17import logging
18from lxml import etree
19from lxml.builder import E
20
21PARSER = etree.ETCompatXMLParser(remove_comments=True, remove_pis=False)
22
23class XMLreader():
24    """
25    Generic XML read and write class. Mostly helper functions.
26    Makes reading/writing XML a bit easier than calling lxml libraries directly.
27
28    :Dependencies:
29        This class requires lxml 2.3 or higher.
30    """
31
32    xml = None
33    xmldoc = None
34    xmlroot = None
35    schema = None
36    schemadoc = None
37    encoding = None
38    processing_instructions = None
39
40    def __init__(self, xml=None, schema=None):
41        self.xml = xml
42        self.schema = schema
43        self.processing_instructions = {}
44        if xml is not None:
45            self.set_xml_file(xml)
46        else:
47            self.xmldoc = None
48            self.xmlroot = None
49        if schema is not None:
50            self.set_schema(schema)
51        else:
52            self.schemadoc = None
53
54    def reader(self):
55        """
56        Read in an XML file into memory and return an lxml dictionary
57        """
58        if self.validate_xml():
59            self.xmldoc = etree.parse(self.xml, parser=PARSER)
60        else:
61            raise etree.XMLSchemaValidateError(self, self.find_invalid_xml())
62        return self.xmldoc
63
64    def set_xml_file(self, xml):
65        """
66        Set the XML file and parse
67        """
68        try:
69            self.xml = xml
70            self.xmldoc = etree.parse(self.xml, parser=PARSER)
71            self.xmlroot = self.xmldoc.getroot()
72        except etree.XMLSyntaxError as xml_error:
73            logging.info(xml_error)
74        except Exception:
75            self.xml = None
76            self.xmldoc = None
77            self.xmlroot = None
78
79    def set_xml_string(self, tag_soup):
80        """
81        Set an XML string as the working XML.
82
83        :param tag_soup: XML formatted string
84        """
85        try:
86            self.xml = tag_soup
87            self.xmldoc = tag_soup
88            self.xmlroot = etree.fromstring(tag_soup)
89        except etree.XMLSyntaxError as xml_error:
90            logging.info(xml_error)
91        except Exception:
92            self.xml = None
93            self.xmldoc = None
94            self.xmlroot = None
95
96    def set_schema(self, schema):
97        """
98        Set the schema file and parse
99        """
100        try:
101            self.schema = schema
102            self.schemadoc = etree.parse(self.schema, parser=PARSER)
103        except etree.XMLSyntaxError as xml_error:
104            logging.info(xml_error)
105        except Exception:
106            self.schema = None
107            self.schemadoc = None
108
109    def validate_xml(self):
110        """
111        Checks to see if the XML file meets the schema
112        """
113        valid = True
114        if self.schema is not None:
115            self.parse_schema_and_doc()
116            schema_check = etree.XMLSchema(self.schemadoc)
117            valid = schema_check.validate(self.xmldoc)
118        return valid
119
120    def find_invalid_xml(self):
121        """
122        Finds the first offending element that should not be present in XML file
123        """
124        first_error = ""
125        self.parse_schema_and_doc()
126        schema = etree.XMLSchema(self.schemadoc)
127        try:
128            first_error = schema.assertValid(self.xmldoc)
129        except etree.DocumentInvalid as err:
130            # Suppress errors for <'any'> elements
131            if "##other" in str(err):
132                return first_error
133            first_error = str(err)
134        return first_error
135
136    def parse_schema_and_doc(self):
137        """
138        Creates a dictionary of the parsed schema and xml files.
139        """
140        self.set_xml_file(self.xml)
141        self.set_schema(self.schema)
142
143    def to_string(self, elem, pretty_print=False, encoding=None):
144        """
145        Converts an etree element into a string
146        """
147        return etree.tostring(elem, pretty_print=pretty_print, \
148                              encoding=encoding)
149
150    def break_processing_instructions(self, string, dic):
151        """
152        Method to break a processing instruction string apart and add to a dict
153
154        :param string: A processing instruction as a string
155        :param dic: The dictionary to save the PIs to
156        """
157        pi_string = string.replace("<?", "").replace("?>", "")
158        split = pi_string.split(" ", 1)
159        pi_name = split[0]
160        attr = split[1]
161        new_pi_name = self._create_unique_key(dic, pi_name)
162        dic[new_pi_name] = attr
163        return dic
164
165    def set_processing_instructions(self):
166        """
167        Take out all processing instructions and create a dictionary from them
168        If there is a default encoding, the value is also saved
169        """
170        dic = {}
171        proc_instr = self.xmlroot.getprevious()
172        while proc_instr is not None:
173            pi_string = self.to_string(proc_instr)
174            if "?>\n<?" in pi_string:
175                pi_string = pi_string.split("?>\n<?")
176            if isinstance(pi_string, str):
177                dic = self.break_processing_instructions(pi_string, dic)
178            elif isinstance(pi_string, list):
179                for item in pi_string:
180                    dic = self.break_processing_instructions(item, dic)
181            proc_instr = proc_instr.getprevious()
182        if 'xml' in dic:
183            self.set_encoding(dic['xml'])
184            del dic['xml']
185        self.processing_instructions = dic
186
187    def set_encoding(self, attr_str):
188        """
189        Find the encoding in the xml declaration and save it as a string
190
191        :param attr_str: All attributes as a string
192            e.g. "foo1="bar1" foo2="bar2" foo3="bar3" ... foo_n="bar_n""
193        """
194        attr_str = attr_str.replace(" = ", "=")
195        attr_list = attr_str.split()
196        for item in attr_list:
197            name_value = item.split("\"=")
198            name = name_value[0].lower()
199            value = name_value[1]
200            if name == "encoding":
201                self.encoding = value
202                return
203        self.encoding = None
204
205    def _create_unique_key(self, dictionary, name, numb=0):
206        """
207        Create a unique key value for any dictionary to prevent overwriting
208        Recurses until a unique key value is found.
209       
210        :param dictionary: A dictionary with any number of entries
211        :param name: The index of the item to be added to dictionary
212        :param numb: The number to be appended to the name, starts at 0
213        """
214        if dictionary.get(name) is not None:
215            numb += 1
216            name = name.split("_")[0]
217            name += "_{0}".format(numb)
218            name = self._create_unique_key(dictionary, name, numb)
219        return name
220
221    def create_tree(self, root):
222        """
223        Create an element tree for processing from an etree element
224
225        :param root: etree Element(s)
226        """
227        return etree.ElementTree(root)
228
229    def create_element_from_string(self, xml_string):
230        """
231        Create an element from an XML string
232
233        :param xml_string: A string of xml
234        """
235        return etree.fromstring(xml_string)
236
237    def create_element(self, name, attrib=None, nsmap=None):
238        """
239        Create an XML element for writing to file
240
241        :param name: The name of the element to be created
242        """
243        if attrib == None:
244            attrib = {}
245        return etree.Element(name, attrib, nsmap)
246
247    def write_text(self, elem, text):
248        """
249        Write text to an etree Element
250
251        :param elem: etree.Element object
252        :param text: text to write to the element
253        """
254        elem.text = text
255        return elem
256
257    def write_attribute(self, elem, attr_name, attr_value):
258        """
259        Write attributes to an Element
260
261        :param elem: etree.Element object
262        :param attr_name: attribute name to write
263        :param attr_value: attribute value to set
264        """
265        attr = elem.attrib
266        attr[attr_name] = attr_value
267
268    def return_processing_instructions(self):
269        """
270        Get all processing instructions saved when loading the document
271
272        :param tree: etree.ElementTree object to write PIs to
273        """
274        pi_list = []
275        if self.processing_instructions is not None:
276            for key in self.processing_instructions:
277                value = self.processing_instructions.get(key)
278                pi_item = etree.ProcessingInstruction(key, value)
279                pi_list.append(pi_item)
280        return pi_list
281
282    def append(self, element, tree):
283        """
284        Append an etree Element to an ElementTree.
285
286        :param element: etree Element to append
287        :param tree: ElementTree object to append to
288        """
289        tree = tree.append(element)
290        return tree
291
292    def ebuilder(self, parent, elementname, text=None, attrib=None):
293        """
294        Use lxml E builder class with arbitrary inputs.
295
296        :param parnet: The parent element to append a child to
297        :param elementname: The name of the child in string form
298        :param text: The element text
299        :param attrib: A dictionary of attribute names to attribute values
300        """
301        text = str(text)
302        if attrib == None:
303            attrib = {}
304        elem = E(elementname, attrib, text)
305        parent = parent.append(elem)
306        return parent
Note: See TracBrowser for help on using the repository browser.