source: sasview/src/sans/dataloader/readers/xml_reader.py @ 92a2ecd

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since 92a2ecd was ac5b69d, checked in by Jeff Krzywon <jeffery.krzywon@…>, 11 years ago

Ticket #249 fix: Saving and loading projects and analysis is now working.

  • Property mode set to 100644
File size: 9.5 KB
Line 
1"""
2    Generic XML read and write utility
3   
4    Usage: Either extend xml_reader or add as a class variable.
5"""
6############################################################################
7#This software was developed by the University of Tennessee as part of the
8#Distributed Data Analysis of Neutron Scattering Experiments (DANSE)
9#project funded by the US National Science Foundation.
10#If you use DANSE applications to do scientific research that leads to
11#publication, we ask that you acknowledge the use of the software with the
12#following sentence:
13#This work benefited from DANSE software developed under NSF award DMR-0520547.
14#copyright 2008,2009 University of Tennessee
15#############################################################################
16
17from lxml import etree
18from lxml.builder import E
19
20PARSER = etree.ETCompatXMLParser(remove_comments=True, remove_pis=False)
21
22class XMLreader():
23    """
24    Generic XML read and write class. Mostly helper functions.
25    Makes reading/writing XML a bit easier than calling lxml libraries directly.
26   
27    :Dependencies:
28        This class requires lxml 2.3 or higher.
29    """
30   
31    xml = None
32    xmldoc = None
33    xmlroot = None
34    schema = None
35    schemadoc = None
36    encoding = None
37    processing_instructions = None
38   
39    def __init__(self, xml = None, schema = None, root = None):
40        self.xml = xml
41        self.schema = schema
42        self.processing_instructions = {}
43        if xml is not None:
44            self.set_xml_file(xml, root)
45        else:
46            self.xmldoc = None
47            self.xmlroot = None
48        if schema is not None:
49            self.set_schema(schema)
50        else:
51            self.schemadoc = None
52   
53    def reader(self):
54        """
55        Read in an XML file into memory and return an lxml dictionary
56        """
57        if self.validate_xml():
58            self.xmldoc = etree.parse(self.xml, parser = PARSER)
59        else:
60            raise etree.XMLSchemaValidateError(self, self.find_invalid_xml())
61        return self.xmldoc
62   
63    def set_xml_file(self, xml, root = None):
64        """
65        Set the XML file and parse
66        """
67        try:
68            self.xml = xml
69            self.xmldoc = etree.parse(self.xml, parser = PARSER)
70            self.xmlroot = self.xmldoc.getroot()
71        except Exception:
72            self.xml = None
73            self.xmldoc = None
74            self.xmlroot = None
75   
76    def set_schema(self, schema):
77        """
78        Set the schema file and parse
79        """
80        try:
81            self.schema = schema
82            self.schemadoc = etree.parse(self.schema, parser = PARSER)
83        except Exception:
84            self.schema = None
85            self.schemadoc = None
86   
87    def validate_xml(self):
88        """
89        Checks to see if the XML file meets the schema
90        """
91        valid = True
92        if self.schema is not None:
93            self.parse_schema_and_doc()
94            schema_check = etree.XMLSchema(self.schemadoc)
95            valid = schema_check.validate(self.xmldoc)
96        return valid
97   
98    def find_invalid_xml(self):
99        """
100        Finds the first offending element that should not be present in XML file
101        """
102        first_error = ""
103        self.parse_schema_and_doc()
104        schema = etree.XMLSchema(self.schemadoc)
105        try:
106            first_error = schema.assertValid(self.xmldoc)
107        except etree.DocumentInvalid as e:
108            first_error = str(e)
109        return first_error
110   
111    def parse_schema_and_doc(self):
112        """
113        Creates a dictionary of the parsed schema and xml files.
114        """
115        self.set_xml_file(self.xml)
116        self.set_schema(self.schema)
117       
118    def to_string(self, elem, pp=False, encoding=None):
119        """
120        Converts an etree element into a string
121        """
122        return etree.tostring(elem, pretty_print = pp, encoding = encoding)
123   
124    def break_processing_instructions(self, string, dic):
125        """
126        Method to break a processing instruction string apart and add to a dict
127       
128        :param string: A processing instruction as a string
129        :param dic: The dictionary to save the PIs to
130        """
131        pi_string = string.replace("<?", "").replace("?>", "")
132        split = pi_string.split(" ", 1)
133        pi_name = split[0]
134        attr = split[1]
135        new_pi_name = self._create_unique_key(dic, pi_name)
136        dic[new_pi_name] = attr
137        return dic
138   
139    def set_processing_instructions(self):
140        """
141        Take out all processing instructions and create a dictionary from them
142        If there is a default encoding, the value is also saved
143        """
144        dic = {}
145        proc_instr = self.xmlroot.getprevious()
146        while proc_instr is not None:
147            pi_string = self.to_string(proc_instr)
148            if "?>\n<?" in pi_string:
149                pi_string = pi_string.split("?>\n<?")
150            if isinstance(pi_string, str):
151                dic = self.break_processing_instructions(pi_string, dic)
152            elif isinstance(pi_string, list):
153                for item in pi_string:
154                    dic = self.break_processing_instructions(item, dic)
155            proc_instr = proc_instr.getprevious()
156        if 'xml' in dic:
157            self.set_encoding(dic['xml'])
158            del dic['xml']
159        self.processing_instructions = dic
160       
161    def set_encoding(self, attr_str):
162        """
163        Find the encoding in the xml declaration and save it as a string
164       
165        :param attr_str: All attributes as a string
166            e.g. "foo1="bar1" foo2="bar2" foo3="bar3" ... foo_n="bar_n""
167        """
168        attr_str = attr_str.replace(" = ", "=")
169        attr_list = attr_str.split( )
170        for item in attr_list:
171            name_value = item.split("\"=")
172            name = name_value[0].lower()
173            value = name_value[1]
174            if name == "encoding":
175                self.encoding = value
176                return
177        self.encoding = None
178       
179    def _create_unique_key(self, dictionary, name, numb = 0):
180        """
181        Create a unique key value for any dictionary to prevent overwriting
182        Recurses until a unique key value is found.
183       
184        :param dictionary: A dictionary with any number of entries
185        :param name: The index of the item to be added to dictionary
186        :param numb: The number to be appended to the name, starts at 0
187        """
188        if dictionary.get(name) is not None:
189            numb += 1
190            name = name.split("_")[0]
191            name += "_{0}".format(numb)
192            name = self._create_unique_key(dictionary, name, numb)
193        return name
194   
195    def create_tree(self, root):
196        """
197        Create an element tree for processing from an etree element
198       
199        :param root: etree Element(s)
200        """
201        return etree.ElementTree(root)
202   
203    def create_element_from_string(self, xml_string):
204        """
205        Create an element from an XML string
206       
207        :param xml_string: A string of xml
208        """
209        return etree.fromstring(xml_string)
210   
211    def create_element(self, name, attrib=None, nsmap=None):
212        """
213        Create an XML element for writing to file
214       
215        :param name: The name of the element to be created
216        """
217        if attrib == None:
218            attrib = {}
219        return etree.Element(name, attrib, nsmap)
220   
221    def write_text(self, elem, text):
222        """
223        Write text to an etree Element
224       
225        :param elem: etree.Element object
226        :param text: text to write to the element
227        """
228        elem.text = text
229        return elem
230   
231    def write_attribute(self, elem, attr_name, attr_value):
232        """
233        Write attributes to an Element
234       
235        :param elem: etree.Element object
236        :param attr_name: attribute name to write
237        :param attr_value: attribute value to set
238        """
239        attr = elem.attrib
240        attr[attr_name] = attr_value
241       
242    def return_processing_instructions(self):
243        """
244        Get all processing instructions saved when loading the document
245       
246        :param tree: etree.ElementTree object to write PIs to
247        """
248        pi_list = []
249        if self.processing_instructions is not None:
250            for key in self.processing_instructions:
251                value = self.processing_instructions.get(key)
252                pi_item = etree.ProcessingInstruction(key, value)
253                pi_list.append(pi_item)
254        return pi_list
255   
256    def append(self, element, tree):
257        """
258        Append an etree Element to an ElementTree.
259       
260        :param element: etree Element to append
261        :param tree: ElementTree object to append to
262        """
263        tree = tree.append(element)
264        return tree
265   
266    def ebuilder(self, parent, elementname, text=None, attrib=None):
267        """
268        Use lxml E builder class with arbitrary inputs.
269       
270        :param parnet: The parent element to append a child to
271        :param elementname: The name of the child in string form
272        :param text: The element text
273        :param attrib: A dictionary of attribute names to attribute values
274        """
275        text = str(text)
276        if attrib == None:
277            attrib = {}
278        elem = E(elementname, attrib, text)
279        parent = parent.append(elem)
280        return parent
281       
Note: See TracBrowser for help on using the repository browser.