source: sasview/src/sas/sascalc/dataloader/readers/xml_reader.py @ ff2b961

Last change on this file since ff2b961 was 7b50f14, checked in by Paul Kienzle <pkienzle@…>, 7 years ago

restore python 2 functionality

  • Property mode set to 100644
File size: 10.0 KB
Line 
1"""
2    Generic XML read and write utility
3
4    Usage: Either extend xml_reader or add as a class variable.
5"""
6############################################################################
7#This software was developed by the University of Tennessee as part of the
8#Distributed Data Analysis of Neutron Scattering Experiments (DANSE)
9#project funded by the US National Science Foundation.
10#If you use DANSE applications to do scientific research that leads to
11#publication, we ask that you acknowledge the use of the software with the
12#following sentence:
13#This work benefited from DANSE software developed under NSF award DMR-0520547.
14#copyright 2008,2009 University of Tennessee
15#############################################################################
16
17import logging
18
19from lxml import etree
20from lxml.builder import E
21
22from ..file_reader_base_class import FileReader, decode
23
24logger = logging.getLogger(__name__)
25
26PARSER = etree.ETCompatXMLParser(remove_comments=True, remove_pis=False)
27
28class XMLreader(FileReader):
29    """
30    Generic XML read and write class. Mostly helper functions.
31    Makes reading/writing XML a bit easier than calling lxml libraries directly.
32
33    :Dependencies:
34        This class requires lxml 2.3 or higher.
35    """
36
37    xml = None
38    xmldoc = None
39    xmlroot = None
40    schema = None
41    schemadoc = None
42    encoding = None
43    processing_instructions = None
44
45    def __init__(self, xml=None, schema=None):
46        self.xml = xml
47        self.schema = schema
48        self.processing_instructions = {}
49        if xml is not None:
50            self.set_xml_file(xml)
51        else:
52            self.xmldoc = None
53            self.xmlroot = None
54        if schema is not None:
55            self.set_schema(schema)
56        else:
57            self.schemadoc = None
58
59    def reader(self):
60        """
61        Read in an XML file into memory and return an lxml dictionary
62        """
63        if self.validate_xml():
64            self.xmldoc = etree.parse(self.xml, parser=PARSER)
65        else:
66            raise etree.XMLSchemaValidateError(self, self.find_invalid_xml())
67        return self.xmldoc
68
69    def set_xml_file(self, xml):
70        """
71        Set the XML file and parse
72        """
73        try:
74            self.xml = xml
75            self.xmldoc = etree.parse(self.xml, parser=PARSER)
76            self.xmlroot = self.xmldoc.getroot()
77        except etree.XMLSyntaxError as xml_error:
78            logger.info(xml_error)
79            raise xml_error
80        except Exception:
81            self.xml = None
82            self.xmldoc = None
83            self.xmlroot = None
84
85    def set_xml_string(self, tag_soup):
86        """
87        Set an XML string as the working XML.
88
89        :param tag_soup: XML formatted string
90        """
91        try:
92            self.xml = tag_soup
93            self.xmldoc = tag_soup
94            self.xmlroot = etree.fromstring(tag_soup)
95        except etree.XMLSyntaxError as xml_error:
96            logger.info(xml_error)
97            raise xml_error
98        except Exception as exc:
99            self.xml = None
100            self.xmldoc = None
101            self.xmlroot = None
102            raise exc
103
104    def set_schema(self, schema):
105        """
106        Set the schema file and parse
107        """
108        try:
109            self.schema = schema
110            self.schemadoc = etree.parse(self.schema, parser=PARSER)
111        except etree.XMLSyntaxError as xml_error:
112            logger.info(xml_error)
113        except Exception:
114            self.schema = None
115            self.schemadoc = None
116
117    def validate_xml(self):
118        """
119        Checks to see if the XML file meets the schema
120        """
121        valid = True
122        if self.schema is not None:
123            self.parse_schema_and_doc()
124            schema_check = etree.XMLSchema(self.schemadoc)
125            valid = schema_check.validate(self.xmldoc)
126        return valid
127
128    def find_invalid_xml(self):
129        """
130        Finds the first offending element that should not be present in XML file
131        """
132        first_error = ""
133        self.parse_schema_and_doc()
134        schema = etree.XMLSchema(self.schemadoc)
135        try:
136            first_error = schema.assertValid(self.xmldoc)
137        except etree.DocumentInvalid as err:
138            # Suppress errors for <'any'> elements
139            if "##other" in str(err):
140                return first_error
141            first_error = str(err)
142        return first_error
143
144    def parse_schema_and_doc(self):
145        """
146        Creates a dictionary of the parsed schema and xml files.
147        """
148        self.set_xml_file(self.xml)
149        self.set_schema(self.schema)
150
151    def to_string(self, elem, pretty_print=False, encoding=None):
152        """
153        Converts an etree element into a string
154        """
155        return decode(etree.tostring(elem, pretty_print=pretty_print,
156                                     encoding=encoding))
157
158    def break_processing_instructions(self, string, dic):
159        """
160        Method to break a processing instruction string apart and add to a dict
161
162        :param string: A processing instruction as a string
163        :param dic: The dictionary to save the PIs to
164        """
165        pi_string = string.replace("<?", "").replace("?>", "")
166        split = pi_string.split(" ", 1)
167        pi_name = split[0]
168        attr = split[1]
169        new_pi_name = self._create_unique_key(dic, pi_name)
170        dic[new_pi_name] = attr
171        return dic
172
173    def set_processing_instructions(self):
174        """
175        Take out all processing instructions and create a dictionary from them
176        If there is a default encoding, the value is also saved
177        """
178        dic = {}
179        proc_instr = self.xmlroot.getprevious()
180        while proc_instr is not None:
181            pi_string = self.to_string(proc_instr)
182            if "?>\n<?" in pi_string:
183                pi_string = pi_string.split("?>\n<?")
184            if isinstance(pi_string, str):
185                dic = self.break_processing_instructions(pi_string, dic)
186            elif isinstance(pi_string, list):
187                for item in pi_string:
188                    dic = self.break_processing_instructions(item, dic)
189            proc_instr = proc_instr.getprevious()
190        if 'xml' in dic:
191            self.set_encoding(dic['xml'])
192            del dic['xml']
193        self.processing_instructions = dic
194
195    def set_encoding(self, attr_str):
196        """
197        Find the encoding in the xml declaration and save it as a string
198
199        :param attr_str: All attributes as a string
200            e.g. "foo1="bar1" foo2="bar2" foo3="bar3" ... foo_n="bar_n""
201        """
202        attr_str = attr_str.replace(" = ", "=")
203        attr_list = attr_str.split()
204        for item in attr_list:
205            name_value = item.split("\"=")
206            name = name_value[0].lower()
207            value = name_value[1]
208            if name == "encoding":
209                self.encoding = value
210                return
211        self.encoding = None
212
213    def _create_unique_key(self, dictionary, name, numb=0):
214        """
215        Create a unique key value for any dictionary to prevent overwriting
216        Recurses until a unique key value is found.
217
218        :param dictionary: A dictionary with any number of entries
219        :param name: The index of the item to be added to dictionary
220        :param numb: The number to be appended to the name, starts at 0
221        """
222        if dictionary.get(name) is not None:
223            numb += 1
224            name = name.split("_")[0]
225            name += "_{0}".format(numb)
226            name = self._create_unique_key(dictionary, name, numb)
227        return name
228
229    def create_tree(self, root):
230        """
231        Create an element tree for processing from an etree element
232
233        :param root: etree Element(s)
234        """
235        return etree.ElementTree(root)
236
237    def create_element_from_string(self, xml_string):
238        """
239        Create an element from an XML string
240
241        :param xml_string: A string of xml
242        """
243        return etree.fromstring(xml_string)
244
245    def create_element(self, name, attrib=None, nsmap=None):
246        """
247        Create an XML element for writing to file
248
249        :param name: The name of the element to be created
250        """
251        if attrib is None:
252            attrib = {}
253        return etree.Element(name, attrib, nsmap)
254
255    def write_text(self, elem, text):
256        """
257        Write text to an etree Element
258
259        :param elem: etree.Element object
260        :param text: text to write to the element
261        """
262        elem.text = text
263        return elem
264
265    def write_attribute(self, elem, attr_name, attr_value):
266        """
267        Write attributes to an Element
268
269        :param elem: etree.Element object
270        :param attr_name: attribute name to write
271        :param attr_value: attribute value to set
272        """
273        attr = elem.attrib
274        attr[attr_name] = attr_value
275
276    def return_processing_instructions(self):
277        """
278        Get all processing instructions saved when loading the document
279
280        :param tree: etree.ElementTree object to write PIs to
281        """
282        pi_list = []
283        if self.processing_instructions is not None:
284            for key in self.processing_instructions:
285                value = self.processing_instructions.get(key)
286                pi_item = etree.ProcessingInstruction(key, value)
287                pi_list.append(pi_item)
288        return pi_list
289
290    def append(self, element, tree):
291        """
292        Append an etree Element to an ElementTree.
293
294        :param element: etree Element to append
295        :param tree: ElementTree object to append to
296        """
297        tree = tree.append(element)
298        return tree
299
300    def ebuilder(self, parent, elementname, text=None, attrib=None):
301        """
302        Use lxml E builder class with arbitrary inputs.
303
304        :param parnet: The parent element to append a child to
305        :param elementname: The name of the child in string form
306        :param text: The element text
307        :param attrib: A dictionary of attribute names to attribute values
308        """
309        text = str(text)
310        if attrib is None:
311            attrib = {}
312        elem = E(elementname, attrib, text)
313        parent = parent.append(elem)
314        return parent
Note: See TracBrowser for help on using the repository browser.