source: sasview/src/sas/sascalc/dataloader/readers/xml_reader.py @ 5a405bd

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalcmagnetic_scattrelease-4.2.2ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since 5a405bd was fafe52a, checked in by lewis, 7 years ago

Refactor Anton Paar SAXS reader to use FileReader? class

  • Property mode set to 100644
File size: 9.9 KB
RevLine 
[5ce7f17]1"""
2    Generic XML read and write utility
3
4    Usage: Either extend xml_reader or add as a class variable.
5"""
6############################################################################
7#This software was developed by the University of Tennessee as part of the
8#Distributed Data Analysis of Neutron Scattering Experiments (DANSE)
9#project funded by the US National Science Foundation.
10#If you use DANSE applications to do scientific research that leads to
11#publication, we ask that you acknowledge the use of the software with the
12#following sentence:
13#This work benefited from DANSE software developed under NSF award DMR-0520547.
14#copyright 2008,2009 University of Tennessee
15#############################################################################
16
17import logging
18from lxml import etree
19from lxml.builder import E
[bc570f4]20from sas.sascalc.dataloader.file_reader_base_class import FileReader
[5ce7f17]21
[463e7ffc]22logger = logging.getLogger(__name__)
[c155a16]23
[5ce7f17]24PARSER = etree.ETCompatXMLParser(remove_comments=True, remove_pis=False)
25
[bc570f4]26class XMLreader(FileReader):
[5ce7f17]27    """
28    Generic XML read and write class. Mostly helper functions.
29    Makes reading/writing XML a bit easier than calling lxml libraries directly.
30
31    :Dependencies:
32        This class requires lxml 2.3 or higher.
33    """
34
35    xml = None
36    xmldoc = None
37    xmlroot = None
38    schema = None
39    schemadoc = None
40    encoding = None
41    processing_instructions = None
42
43    def __init__(self, xml=None, schema=None):
44        self.xml = xml
45        self.schema = schema
46        self.processing_instructions = {}
47        if xml is not None:
48            self.set_xml_file(xml)
49        else:
50            self.xmldoc = None
51            self.xmlroot = None
52        if schema is not None:
53            self.set_schema(schema)
54        else:
55            self.schemadoc = None
56
57    def reader(self):
58        """
59        Read in an XML file into memory and return an lxml dictionary
60        """
61        if self.validate_xml():
62            self.xmldoc = etree.parse(self.xml, parser=PARSER)
63        else:
64            raise etree.XMLSchemaValidateError(self, self.find_invalid_xml())
65        return self.xmldoc
66
67    def set_xml_file(self, xml):
68        """
69        Set the XML file and parse
70        """
71        try:
72            self.xml = xml
73            self.xmldoc = etree.parse(self.xml, parser=PARSER)
74            self.xmlroot = self.xmldoc.getroot()
75        except etree.XMLSyntaxError as xml_error:
[c155a16]76            logger.info(xml_error)
[3ece5dd]77            raise xml_error
[a235f715]78        except Exception:
79            self.xml = None
80            self.xmldoc = None
81            self.xmlroot = None
82
83    def set_xml_string(self, tag_soup):
84        """
85        Set an XML string as the working XML.
86
87        :param tag_soup: XML formatted string
88        """
89        try:
90            self.xml = tag_soup
91            self.xmldoc = tag_soup
92            self.xmlroot = etree.fromstring(tag_soup)
93        except etree.XMLSyntaxError as xml_error:
[c155a16]94            logger.info(xml_error)
[fafe52a]95            raise xml_error
96        except Exception as exc:
[5ce7f17]97            self.xml = None
98            self.xmldoc = None
99            self.xmlroot = None
[fafe52a]100            raise exc
[5ce7f17]101
102    def set_schema(self, schema):
103        """
104        Set the schema file and parse
105        """
106        try:
107            self.schema = schema
108            self.schemadoc = etree.parse(self.schema, parser=PARSER)
109        except etree.XMLSyntaxError as xml_error:
[c155a16]110            logger.info(xml_error)
[5ce7f17]111        except Exception:
112            self.schema = None
113            self.schemadoc = None
114
115    def validate_xml(self):
116        """
117        Checks to see if the XML file meets the schema
118        """
119        valid = True
120        if self.schema is not None:
121            self.parse_schema_and_doc()
122            schema_check = etree.XMLSchema(self.schemadoc)
123            valid = schema_check.validate(self.xmldoc)
124        return valid
125
126    def find_invalid_xml(self):
127        """
128        Finds the first offending element that should not be present in XML file
129        """
130        first_error = ""
131        self.parse_schema_and_doc()
132        schema = etree.XMLSchema(self.schemadoc)
133        try:
134            first_error = schema.assertValid(self.xmldoc)
135        except etree.DocumentInvalid as err:
136            first_error = str(err)
137        return first_error
138
139    def parse_schema_and_doc(self):
140        """
141        Creates a dictionary of the parsed schema and xml files.
142        """
143        self.set_xml_file(self.xml)
144        self.set_schema(self.schema)
145
146    def to_string(self, elem, pretty_print=False, encoding=None):
147        """
148        Converts an etree element into a string
149        """
150        return etree.tostring(elem, pretty_print=pretty_print, \
151                              encoding=encoding)
152
153    def break_processing_instructions(self, string, dic):
154        """
155        Method to break a processing instruction string apart and add to a dict
156
157        :param string: A processing instruction as a string
158        :param dic: The dictionary to save the PIs to
159        """
160        pi_string = string.replace("<?", "").replace("?>", "")
161        split = pi_string.split(" ", 1)
162        pi_name = split[0]
163        attr = split[1]
164        new_pi_name = self._create_unique_key(dic, pi_name)
165        dic[new_pi_name] = attr
166        return dic
167
168    def set_processing_instructions(self):
169        """
170        Take out all processing instructions and create a dictionary from them
171        If there is a default encoding, the value is also saved
172        """
173        dic = {}
174        proc_instr = self.xmlroot.getprevious()
175        while proc_instr is not None:
176            pi_string = self.to_string(proc_instr)
177            if "?>\n<?" in pi_string:
178                pi_string = pi_string.split("?>\n<?")
179            if isinstance(pi_string, str):
180                dic = self.break_processing_instructions(pi_string, dic)
181            elif isinstance(pi_string, list):
182                for item in pi_string:
183                    dic = self.break_processing_instructions(item, dic)
184            proc_instr = proc_instr.getprevious()
185        if 'xml' in dic:
186            self.set_encoding(dic['xml'])
187            del dic['xml']
188        self.processing_instructions = dic
189
190    def set_encoding(self, attr_str):
191        """
192        Find the encoding in the xml declaration and save it as a string
193
194        :param attr_str: All attributes as a string
195            e.g. "foo1="bar1" foo2="bar2" foo3="bar3" ... foo_n="bar_n""
196        """
197        attr_str = attr_str.replace(" = ", "=")
198        attr_list = attr_str.split()
199        for item in attr_list:
200            name_value = item.split("\"=")
201            name = name_value[0].lower()
202            value = name_value[1]
203            if name == "encoding":
204                self.encoding = value
205                return
206        self.encoding = None
207
208    def _create_unique_key(self, dictionary, name, numb=0):
209        """
210        Create a unique key value for any dictionary to prevent overwriting
211        Recurses until a unique key value is found.
[3ece5dd]212
[5ce7f17]213        :param dictionary: A dictionary with any number of entries
214        :param name: The index of the item to be added to dictionary
215        :param numb: The number to be appended to the name, starts at 0
216        """
217        if dictionary.get(name) is not None:
218            numb += 1
219            name = name.split("_")[0]
220            name += "_{0}".format(numb)
221            name = self._create_unique_key(dictionary, name, numb)
222        return name
223
224    def create_tree(self, root):
225        """
226        Create an element tree for processing from an etree element
227
[3ece5dd]228        :param root: etree Element(s)
[5ce7f17]229        """
230        return etree.ElementTree(root)
231
232    def create_element_from_string(self, xml_string):
233        """
234        Create an element from an XML string
235
236        :param xml_string: A string of xml
237        """
238        return etree.fromstring(xml_string)
239
240    def create_element(self, name, attrib=None, nsmap=None):
241        """
242        Create an XML element for writing to file
243
244        :param name: The name of the element to be created
245        """
[235f514]246        if attrib is None:
[5ce7f17]247            attrib = {}
248        return etree.Element(name, attrib, nsmap)
249
250    def write_text(self, elem, text):
251        """
252        Write text to an etree Element
253
254        :param elem: etree.Element object
255        :param text: text to write to the element
256        """
257        elem.text = text
258        return elem
259
260    def write_attribute(self, elem, attr_name, attr_value):
261        """
262        Write attributes to an Element
263
264        :param elem: etree.Element object
265        :param attr_name: attribute name to write
266        :param attr_value: attribute value to set
267        """
268        attr = elem.attrib
269        attr[attr_name] = attr_value
270
271    def return_processing_instructions(self):
272        """
273        Get all processing instructions saved when loading the document
274
275        :param tree: etree.ElementTree object to write PIs to
276        """
277        pi_list = []
278        if self.processing_instructions is not None:
279            for key in self.processing_instructions:
280                value = self.processing_instructions.get(key)
281                pi_item = etree.ProcessingInstruction(key, value)
282                pi_list.append(pi_item)
283        return pi_list
284
285    def append(self, element, tree):
286        """
287        Append an etree Element to an ElementTree.
288
289        :param element: etree Element to append
290        :param tree: ElementTree object to append to
291        """
292        tree = tree.append(element)
293        return tree
294
295    def ebuilder(self, parent, elementname, text=None, attrib=None):
296        """
297        Use lxml E builder class with arbitrary inputs.
298
299        :param parnet: The parent element to append a child to
300        :param elementname: The name of the child in string form
301        :param text: The element text
302        :param attrib: A dictionary of attribute names to attribute values
303        """
304        text = str(text)
[235f514]305        if attrib is None:
[5ce7f17]306            attrib = {}
307        elem = E(elementname, attrib, text)
308        parent = parent.append(elem)
[3ece5dd]309        return parent
Note: See TracBrowser for help on using the repository browser.