source: sasview/src/sans/dataloader/readers/xml_reader.py @ 4e9f227

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since 4e9f227 was 2e3b055, checked in by Jeff Krzywon <jeffery.krzywon@…>, 10 years ago

Transitioned from minidom to lxml for writing xml for neater output. XMLreader now has helper methods used to read and write xml data. Methods were taken from cansasReader to separate generic XML functions from cansas specific ones.

  • Property mode set to 100644
File size: 8.6 KB
Line 
1"""
2    Generic XML read and write utility
3   
4    Usage: Either extend xml_reader or add as a class variable.
5"""
6############################################################################
7#This software was developed by the University of Tennessee as part of the
8#Distributed Data Analysis of Neutron Scattering Experiments (DANSE)
9#project funded by the US National Science Foundation.
10#If you use DANSE applications to do scientific research that leads to
11#publication, we ask that you acknowledge the use of the software with the
12#following sentence:
13#This work benefited from DANSE software developed under NSF award DMR-0520547.
14#copyright 2008,2009 University of Tennessee
15#############################################################################
16
17from lxml import etree
18from lxml.builder import E
19parser = etree.ETCompatXMLParser(remove_comments=True, remove_pis=False)
20
21class XMLreader():
22   
23    xml = None
24    xmldoc = None
25    xmlroot = None
26    schema = None
27    schemadoc = None
28    encoding = None
29    processingInstructions = None
30   
31    def __init__(self, xml = None, schema = None, root = None):
32        self.xml = xml
33        self.schema = schema
34        self.processingInstructions = {}
35        if xml is not None:
36            self.setXMLFile(xml, root)
37        else:
38            self.xmldoc = None
39            self.xmlroot = None
40        if schema is not None:
41            self.setSchema(schema)
42        else:
43            self.schemadoc = None
44   
45    def reader(self):
46        """
47        Read in an XML file into memory and return an lxml dictionary
48        """
49        if self.validateXML():
50            self.xmldoc = etree.parse(self.xml, parser = parser)
51        else:
52            raise etree.ValidationError(self, self.findInvalidXML())
53        return self.xmldoc
54   
55    def setXMLFile(self, xml, root = None):
56        """
57        Set the XML file and parse
58        """
59        try:
60            self.xml = xml
61            self.xmldoc = etree.parse(self.xml, parser = parser)
62            self.xmlroot = self.xmldoc.getroot()
63        except Exception:
64            self.xml = None
65            self.xmldoc = None
66            self.xmlroot = None
67   
68    def setSchema(self, schema):
69        """
70        Set the schema file and parse
71        """
72        try:
73            self.schema = schema
74            self.schemadoc = etree.parse(self.schema, parser = parser)
75        except Exception:
76            self.schema = None
77            self.schemadoc = None
78   
79    def validateXML(self):
80        """
81        Checks to see if the XML file meets the schema
82        """
83        valid = True
84        if self.schema is not None:
85            self.parseSchemaAndDoc()
86            schemaCheck = etree.XMLSchema(self.schemadoc)
87            valid = schemaCheck.validate(self.xmldoc)
88        return valid
89   
90    def findInvalidXML(self):
91        """
92        Finds the first offending element that should not be present in XML file
93        """
94        firstError = ""
95        self.parseSchemaAndDoc()
96        schema = etree.XMLSchema(self.schemadoc)
97        try:
98            firstError = schema.assertValid(self.xmldoc)
99        except etree.DocumentInvalid as e:
100            firstError = str(e)
101        return firstError
102   
103    def parseSchemaAndDoc(self):
104        """
105        Creates a dictionary of the parsed schema and xml files.
106        """
107        self.setXMLFile(self.xml)
108        self.setSchema(self.schema)
109       
110    def toString(self, elem, pp=False, encoding=None):
111        """
112        Converts an etree element into a string
113        """
114        return etree.tostring(elem, pretty_print = pp, encoding = encoding)
115   
116    def break_processing_instructions(self, string, dic):
117        """
118        Method to break a processing instruction string apart and add to a dict
119       
120        :param string: A processing instruction as a string
121        :param dic: The dictionary to save the PIs to
122        """
123        pi_string = string.replace("<?", "").replace("?>", "")
124        split = pi_string.split(" ", 1)
125        pi_name = split[0]
126        attr = split[1]
127        new_pi_name = self._create_unique_key(dic, pi_name)
128        dic[new_pi_name] = attr
129        return dic
130   
131    def setProcessingInstructions(self):
132        """
133        Take out all processing instructions and create a dictionary from them
134        If there is a default encoding, the value is also saved
135        """
136        dic = {}
137        pi = self.xmlroot.getprevious()
138        while pi is not None:
139            pi_string = self.toString(pi)
140            if "?>\n<?" in pi_string:
141                pi_string = pi_string.split("?>\n<?")
142            if isinstance(pi_string, str):
143                dic = self.break_processing_instructions(pi_string, dic)
144            elif isinstance(pi_string, list):
145                for item in pi_string:
146                    dic = self.break_processing_instructions(item, dic)
147            pi = pi.getprevious()
148        if 'xml' in dic:
149            self.setEncoding(dic['xml'])
150            del dic['xml']
151        self.processingInstructions = dic
152       
153    def setEncoding(self, attr_str):
154        """
155        Find the encoding in the xml declaration and save it as a string
156       
157        :param attr_str: All attributes as a string
158            e.g. "foo1="bar1" foo2="bar2" foo3="bar3" ... foo_n="bar_n""
159        """
160        attr_str = attr_str.replace(" = ", "=")
161        attr_list = attr_str.split( )
162        for item in attr_list:
163            name_value = item.split("\"=")
164            name = name_value[0].lower()
165            value = name_value[1]
166            if name == "encoding":
167                self.encoding = value
168                return
169        self.encoding = None
170       
171    def _create_unique_key(self, dictionary, name, i = 0):
172        """
173        Create a unique key value for any dictionary to prevent overwriting
174        Recurses until a unique key value is found.
175       
176        :param dictionary: A dictionary with any number of entries
177        :param name: The index of the item to be added to dictionary
178        :param i: The number to be appended to the name, starts at 0
179        """
180        if dictionary.get(name) is not None:
181            i += 1
182            name = name.split("_")[0]
183            name += "_{0}".format(i)
184            name = self._create_unique_key(dictionary, name, i)
185        return name
186   
187    def create_tree(self, root):
188        """
189        Create an element tree for processing from an etree element
190       
191        :param root: etree Element(s)
192        """
193        return etree.ElementTree(root)
194   
195    def create_element_from_string(self, s):
196        """
197        Create an element from an XML string
198       
199        :param s: A string of xml
200        """
201        return etree.fromstring(s)
202   
203    def create_element(self, name, attrib={}, nsmap=None):
204        """
205        Create an XML element for writing to file
206       
207        :param name: The name of the element to be created
208        """
209        return etree.Element(name, attrib, nsmap)
210   
211    def write_text(self, elem, text):
212        """
213        Write text to an etree Element
214       
215        :param elem: etree.Element object
216        :param text: text to write to the element
217        """
218        elem.text = text
219        return elem
220   
221    def write_attribute(self, elem, attr_name, attr_value):
222        """
223        Write attributes to an Element
224       
225        :param elem: etree.Element object
226        :param attr_name: attribute name to write
227        :param attr_value: attribute value to set
228        """
229        attr = elem.attrib
230        attr[attr_name] = attr_value
231       
232    def return_processing_instructions(self):
233        """
234        Get all processing instructions saved when loading the document
235       
236        :param tree: etree.ElementTree object to write PIs to
237        """
238        pi_list = []
239        for key in self.processingInstructions:
240            value = self.processingInstructions.get(key)
241            pi = etree.ProcessingInstruction(key, value)
242            pi_list.append(pi)
243        return pi_list
244   
245    def append(self, element, tree):
246        """
247        Append an etree Element to an ElementTree.
248       
249        :param element: etree Element to append
250        :param tree: ElementTree object to append to
251        """
252        tree = tree.append(element)
253        return tree
254   
255    def ebuilder(self, parent, elementname, text=None, attrib={}):
256        text = str(text)
257        elem = E(elementname, attrib, text)
258        parent = parent.append(elem)
259        return parent
260       
Note: See TracBrowser for help on using the repository browser.