source: sasview/src/sas/sascalc/dataloader/readers/xml_reader.py @ 146c669

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalcmagnetic_scattrelease-4.2.2ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since 146c669 was 6a455cd3, checked in by krzywon, 7 years ago

Merge branch 'master' into 4_1_issues

# Conflicts:
# docs/sphinx-docs/source/conf.py
# src/sas/sascalc/dataloader/readers/cansas_reader.py
# src/sas/sasgui/guiframe/documentation_window.py
# src/sas/sasgui/perspectives/fitting/models.py

  • Property mode set to 100644
File size: 9.9 KB
Line 
1"""
2    Generic XML read and write utility
3
4    Usage: Either extend xml_reader or add as a class variable.
5"""
6############################################################################
7#This software was developed by the University of Tennessee as part of the
8#Distributed Data Analysis of Neutron Scattering Experiments (DANSE)
9#project funded by the US National Science Foundation.
10#If you use DANSE applications to do scientific research that leads to
11#publication, we ask that you acknowledge the use of the software with the
12#following sentence:
13#This work benefited from DANSE software developed under NSF award DMR-0520547.
14#copyright 2008,2009 University of Tennessee
15#############################################################################
16
17import logging
18from lxml import etree
19from lxml.builder import E
20
21logger = logging.getLogger(__name__)
22
23PARSER = etree.ETCompatXMLParser(remove_comments=True, remove_pis=False)
24
25class XMLreader():
26    """
27    Generic XML read and write class. Mostly helper functions.
28    Makes reading/writing XML a bit easier than calling lxml libraries directly.
29
30    :Dependencies:
31        This class requires lxml 2.3 or higher.
32    """
33
34    xml = None
35    xmldoc = None
36    xmlroot = None
37    schema = None
38    schemadoc = None
39    encoding = None
40    processing_instructions = None
41
42    def __init__(self, xml=None, schema=None):
43        self.xml = xml
44        self.schema = schema
45        self.processing_instructions = {}
46        if xml is not None:
47            self.set_xml_file(xml)
48        else:
49            self.xmldoc = None
50            self.xmlroot = None
51        if schema is not None:
52            self.set_schema(schema)
53        else:
54            self.schemadoc = None
55
56    def reader(self):
57        """
58        Read in an XML file into memory and return an lxml dictionary
59        """
60        if self.validate_xml():
61            self.xmldoc = etree.parse(self.xml, parser=PARSER)
62        else:
63            raise etree.XMLSchemaValidateError(self, self.find_invalid_xml())
64        return self.xmldoc
65
66    def set_xml_file(self, xml):
67        """
68        Set the XML file and parse
69        """
70        try:
71            self.xml = xml
72            self.xmldoc = etree.parse(self.xml, parser=PARSER)
73            self.xmlroot = self.xmldoc.getroot()
74        except etree.XMLSyntaxError as xml_error:
75            logger.info(xml_error)
76        except Exception:
77            self.xml = None
78            self.xmldoc = None
79            self.xmlroot = None
80
81    def set_xml_string(self, tag_soup):
82        """
83        Set an XML string as the working XML.
84
85        :param tag_soup: XML formatted string
86        """
87        try:
88            self.xml = tag_soup
89            self.xmldoc = tag_soup
90            self.xmlroot = etree.fromstring(tag_soup)
91        except etree.XMLSyntaxError as xml_error:
92            logger.info(xml_error)
93        except Exception:
94            self.xml = None
95            self.xmldoc = None
96            self.xmlroot = None
97
98    def set_schema(self, schema):
99        """
100        Set the schema file and parse
101        """
102        try:
103            self.schema = schema
104            self.schemadoc = etree.parse(self.schema, parser=PARSER)
105        except etree.XMLSyntaxError as xml_error:
106            logger.info(xml_error)
107        except Exception:
108            self.schema = None
109            self.schemadoc = None
110
111    def validate_xml(self):
112        """
113        Checks to see if the XML file meets the schema
114        """
115        valid = True
116        if self.schema is not None:
117            self.parse_schema_and_doc()
118            schema_check = etree.XMLSchema(self.schemadoc)
119            valid = schema_check.validate(self.xmldoc)
120        return valid
121
122    def find_invalid_xml(self):
123        """
124        Finds the first offending element that should not be present in XML file
125        """
126        first_error = ""
127        self.parse_schema_and_doc()
128        schema = etree.XMLSchema(self.schemadoc)
129        try:
130            first_error = schema.assertValid(self.xmldoc)
131        except etree.DocumentInvalid as err:
132            # Suppress errors for <'any'> elements
133            if "##other" in str(err):
134                return first_error
135            first_error = str(err)
136        return first_error
137
138    def parse_schema_and_doc(self):
139        """
140        Creates a dictionary of the parsed schema and xml files.
141        """
142        self.set_xml_file(self.xml)
143        self.set_schema(self.schema)
144
145    def to_string(self, elem, pretty_print=False, encoding=None):
146        """
147        Converts an etree element into a string
148        """
149        return etree.tostring(elem, pretty_print=pretty_print, \
150                              encoding=encoding)
151
152    def break_processing_instructions(self, string, dic):
153        """
154        Method to break a processing instruction string apart and add to a dict
155
156        :param string: A processing instruction as a string
157        :param dic: The dictionary to save the PIs to
158        """
159        pi_string = string.replace("<?", "").replace("?>", "")
160        split = pi_string.split(" ", 1)
161        pi_name = split[0]
162        attr = split[1]
163        new_pi_name = self._create_unique_key(dic, pi_name)
164        dic[new_pi_name] = attr
165        return dic
166
167    def set_processing_instructions(self):
168        """
169        Take out all processing instructions and create a dictionary from them
170        If there is a default encoding, the value is also saved
171        """
172        dic = {}
173        proc_instr = self.xmlroot.getprevious()
174        while proc_instr is not None:
175            pi_string = self.to_string(proc_instr)
176            if "?>\n<?" in pi_string:
177                pi_string = pi_string.split("?>\n<?")
178            if isinstance(pi_string, str):
179                dic = self.break_processing_instructions(pi_string, dic)
180            elif isinstance(pi_string, list):
181                for item in pi_string:
182                    dic = self.break_processing_instructions(item, dic)
183            proc_instr = proc_instr.getprevious()
184        if 'xml' in dic:
185            self.set_encoding(dic['xml'])
186            del dic['xml']
187        self.processing_instructions = dic
188
189    def set_encoding(self, attr_str):
190        """
191        Find the encoding in the xml declaration and save it as a string
192
193        :param attr_str: All attributes as a string
194            e.g. "foo1="bar1" foo2="bar2" foo3="bar3" ... foo_n="bar_n""
195        """
196        attr_str = attr_str.replace(" = ", "=")
197        attr_list = attr_str.split()
198        for item in attr_list:
199            name_value = item.split("\"=")
200            name = name_value[0].lower()
201            value = name_value[1]
202            if name == "encoding":
203                self.encoding = value
204                return
205        self.encoding = None
206
207    def _create_unique_key(self, dictionary, name, numb=0):
208        """
209        Create a unique key value for any dictionary to prevent overwriting
210        Recurses until a unique key value is found.
211       
212        :param dictionary: A dictionary with any number of entries
213        :param name: The index of the item to be added to dictionary
214        :param numb: The number to be appended to the name, starts at 0
215        """
216        if dictionary.get(name) is not None:
217            numb += 1
218            name = name.split("_")[0]
219            name += "_{0}".format(numb)
220            name = self._create_unique_key(dictionary, name, numb)
221        return name
222
223    def create_tree(self, root):
224        """
225        Create an element tree for processing from an etree element
226
227        :param root: etree Element(s)
228        """
229        return etree.ElementTree(root)
230
231    def create_element_from_string(self, xml_string):
232        """
233        Create an element from an XML string
234
235        :param xml_string: A string of xml
236        """
237        return etree.fromstring(xml_string)
238
239    def create_element(self, name, attrib=None, nsmap=None):
240        """
241        Create an XML element for writing to file
242
243        :param name: The name of the element to be created
244        """
245        if attrib is None:
246            attrib = {}
247        return etree.Element(name, attrib, nsmap)
248
249    def write_text(self, elem, text):
250        """
251        Write text to an etree Element
252
253        :param elem: etree.Element object
254        :param text: text to write to the element
255        """
256        elem.text = text
257        return elem
258
259    def write_attribute(self, elem, attr_name, attr_value):
260        """
261        Write attributes to an Element
262
263        :param elem: etree.Element object
264        :param attr_name: attribute name to write
265        :param attr_value: attribute value to set
266        """
267        attr = elem.attrib
268        attr[attr_name] = attr_value
269
270    def return_processing_instructions(self):
271        """
272        Get all processing instructions saved when loading the document
273
274        :param tree: etree.ElementTree object to write PIs to
275        """
276        pi_list = []
277        if self.processing_instructions is not None:
278            for key in self.processing_instructions:
279                value = self.processing_instructions.get(key)
280                pi_item = etree.ProcessingInstruction(key, value)
281                pi_list.append(pi_item)
282        return pi_list
283
284    def append(self, element, tree):
285        """
286        Append an etree Element to an ElementTree.
287
288        :param element: etree Element to append
289        :param tree: ElementTree object to append to
290        """
291        tree = tree.append(element)
292        return tree
293
294    def ebuilder(self, parent, elementname, text=None, attrib=None):
295        """
296        Use lxml E builder class with arbitrary inputs.
297
298        :param parnet: The parent element to append a child to
299        :param elementname: The name of the child in string form
300        :param text: The element text
301        :param attrib: A dictionary of attribute names to attribute values
302        """
303        text = str(text)
304        if attrib is None:
305            attrib = {}
306        elem = E(elementname, attrib, text)
307        parent = parent.append(elem)
308        return parent
Note: See TracBrowser for help on using the repository browser.