source: sasview/src/sas/sascalc/dataloader/loader.py @ 4a8d55c

magnetic_scattrelease-4.2.2ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249unittest-saveload
Last change on this file since 4a8d55c was 4a8d55c, checked in by krzywon, 15 months ago

Propagate through loader when errors are thrown regardless of the error. Add tests using the same file with different extensions (including deprecated extensions).

  • Property mode set to 100644
File size: 15.6 KB
Line 
1"""
2    File handler to support different file extensions.
3    Uses reflectometer registry utility.
4
5    The default readers are found in the 'readers' sub-module
6    and registered by default at initialization time.
7
8    To add a new default reader, one must register it in
9    the register_readers method found in readers/__init__.py.
10
11    A utility method (find_plugins) is available to inspect
12    a directory (for instance, a user plug-in directory) and
13    look for new readers/writers.
14"""
15#####################################################################
16# This software was developed by the University of Tennessee as part of the
17# Distributed Data Analysis of Neutron Scattering Experiments (DANSE)
18# project funded by the US National Science Foundation.
19# See the license text in license.txt
20# copyright 2008, University of Tennessee
21######################################################################
22
23import os
24import sys
25import logging
26import time
27from zipfile import ZipFile
28
29from sas.sascalc.data_util.registry import ExtensionRegistry
30
31# Default readers are defined in the readers sub-module
32from . import readers
33from .loader_exceptions import NoKnownLoaderException, FileContentsException,\
34    DefaultReaderException
35from .readers import ascii_reader
36from .readers import cansas_reader
37from .readers import cansas_reader_HDF5
38
39logger = logging.getLogger(__name__)
40
41
42class Registry(ExtensionRegistry):
43    """
44    Registry class for file format extensions.
45    Readers and writers are supported.
46    """
47    def __init__(self):
48        super(Registry, self).__init__()
49
50        # Writers
51        self.writers = {}
52
53        # List of wildcards
54        self.wildcards = ['All (*.*)|*.*']
55
56        # Creation time, for testing
57        self._created = time.time()
58
59        # Register default readers
60        readers.read_associations(self)
61
62    def load(self, path, format=None):
63        """
64        Call the loader for the file type of path.
65
66        :param path: file path
67        :param format: explicit extension, to force the use
68            of a particular reader
69
70        Defaults to the ascii (multi-column), cansas XML, and cansas NeXuS
71        readers if no reader was registered for the file's extension.
72        """
73        # Gets set to a string if the file has an associated reader that fails
74        msg_from_reader = None
75        try:
76            return super(Registry, self).load(path, format=format)
77        #except Exception: raise  # for debugging, don't use fallback loader
78        except NoKnownLoaderException as nkl_e:
79            pass  # Try the ASCII reader
80        except FileContentsException as fc_exc:
81            # File has an associated reader but it failed.
82            # Save the error message to display later, but try the 3 default loaders
83            msg_from_reader = fc_exc.message
84        except Exception:
85            pass
86
87        # File has no associated reader, or the associated reader failed.
88        # Try the ASCII reader
89        try:
90            ascii_loader = ascii_reader.Reader()
91            return ascii_loader.read(path)
92        except NoKnownLoaderException:
93            pass  # Try the Cansas XML reader
94        except DefaultReaderException:
95            pass  # Loader specific error to try the cansas XML reader
96        except FileContentsException as e:
97            if msg_from_reader is None:
98                raise RuntimeError(e.message)
99
100        # ASCII reader failed - try CanSAS xML reader
101        try:
102            cansas_loader = cansas_reader.Reader()
103            return cansas_loader.read(path)
104        except NoKnownLoaderException:
105            pass  # Try the NXcanSAS reader
106        except DefaultReaderException:
107            pass  # Loader specific error to try the NXcanSAS reader
108        except FileContentsException as e:
109            if msg_from_reader is None:
110                raise RuntimeError(e.message)
111        except Exception:
112            pass
113
114        # CanSAS XML reader failed - try NXcanSAS reader
115        try:
116            cansas_nexus_loader = cansas_reader_HDF5.Reader()
117            return cansas_nexus_loader.read(path)
118        except DefaultReaderException as e:
119            logging.error("No default loader can load the data")
120            # No known reader available. Give up and throw an error
121            if msg_from_reader is None:
122                msg = "\nUnknown data format: {}.\nThe file is not a ".format(path)
123                msg += "known format that can be loaded by SasView.\n"
124                raise NoKnownLoaderException(msg)
125            else:
126                # Associated reader and default readers all failed.
127                # Show error message from associated reader
128                raise RuntimeError(msg_from_reader)
129        except FileContentsException as e:
130            err_msg = msg_from_reader if msg_from_reader is not None else e.message
131            raise RuntimeError(err_msg)
132
133    def find_plugins(self, dir):
134        """
135        Find readers in a given directory. This method
136        can be used to inspect user plug-in directories to
137        find new readers/writers.
138
139        :param dir: directory to search into
140        :return: number of readers found
141        """
142        readers_found = 0
143        temp_path = os.path.abspath(dir)
144        if not os.path.isdir(temp_path):
145            temp_path = os.path.join(os.getcwd(), dir)
146        if not os.path.isdir(temp_path):
147            temp_path = os.path.join(os.path.dirname(__file__), dir)
148        if not os.path.isdir(temp_path):
149            temp_path = os.path.join(os.path.dirname(sys.path[0]), dir)
150
151        dir = temp_path
152        # Check whether the directory exists
153        if not os.path.isdir(dir):
154            msg = "DataLoader couldn't locate DataLoader plugin folder."
155            msg += """ "%s" does not exist""" % dir
156            logger.warning(msg)
157            return readers_found
158
159        for item in os.listdir(dir):
160            full_path = os.path.join(dir, item)
161            if os.path.isfile(full_path):
162
163                # Process python files
164                if item.endswith('.py'):
165                    toks = os.path.splitext(os.path.basename(item))
166                    try:
167                        sys.path.insert(0, os.path.abspath(dir))
168                        module = __import__(toks[0], globals(), locals())
169                        if self._identify_plugin(module):
170                            readers_found += 1
171                    except:
172                        msg = "Loader: Error importing "
173                        msg += "%s\n  %s" % (item, sys.exc_value)
174                        logger.error(msg)
175
176                # Process zip files
177                elif item.endswith('.zip'):
178                    try:
179                        # Find the modules in the zip file
180                        zfile = ZipFile(item)
181                        nlist = zfile.namelist()
182
183                        sys.path.insert(0, item)
184                        for mfile in nlist:
185                            try:
186                                # Change OS path to python path
187                                fullname = mfile.replace('/', '.')
188                                fullname = os.path.splitext(fullname)[0]
189                                module = __import__(fullname, globals(),
190                                                    locals(), [""])
191                                if self._identify_plugin(module):
192                                    readers_found += 1
193                            except:
194                                msg = "Loader: Error importing"
195                                msg += " %s\n  %s" % (mfile, sys.exc_value)
196                                logger.error(msg)
197
198                    except:
199                        msg = "Loader: Error importing "
200                        msg += " %s\n  %s" % (item, sys.exc_value)
201                        logger.error(msg)
202
203        return readers_found
204
205    def associate_file_type(self, ext, module):
206        """
207        Look into a module to find whether it contains a
208        Reader class. If so, APPEND it to readers and (potentially)
209        to the list of writers for the given extension
210
211        :param ext: file extension [string]
212        :param module: module object
213        """
214        reader_found = False
215
216        if hasattr(module, "Reader"):
217            try:
218                # Find supported extensions
219                loader = module.Reader()
220                if ext not in self.loaders:
221                    self.loaders[ext] = []
222                # Append the new reader to the list
223                self.loaders[ext].append(loader.read)
224
225                reader_found = True
226
227                # Keep track of wildcards
228                type_name = module.__name__
229                if hasattr(loader, 'type_name'):
230                    type_name = loader.type_name
231
232                wcard = "%s files (*%s)|*%s" % (type_name, ext.lower(),
233                                                ext.lower())
234                if wcard not in self.wildcards:
235                    self.wildcards.append(wcard)
236
237                # Check whether writing is supported
238                if hasattr(loader, 'write'):
239                    if ext not in self.writers:
240                        self.writers[ext] = []
241                    # Append the new writer to the list
242                    self.writers[ext].append(loader.write)
243
244            except:
245                msg = "Loader: Error accessing"
246                msg += " Reader in %s\n  %s" % (module.__name__, sys.exc_value)
247                logger.error(msg)
248        return reader_found
249
250    def associate_file_reader(self, ext, loader):
251        """
252        Append a reader object to readers
253
254        :param ext: file extension [string]
255        :param module: reader object
256        """
257        reader_found = False
258
259        try:
260            # Find supported extensions
261            if ext not in self.loaders:
262                self.loaders[ext] = []
263            # Append the new reader to the list
264            self.loaders[ext].append(loader.read)
265
266            reader_found = True
267
268            # Keep track of wildcards
269            if hasattr(loader, 'type_name'):
270                type_name = loader.type_name
271
272                wcard = "%s files (*%s)|*%s" % (type_name, ext.lower(),
273                                                ext.lower())
274                if wcard not in self.wildcards:
275                    self.wildcards.append(wcard)
276
277        except:
278            msg = "Loader: Error accessing Reader "
279            msg += "in %s\n  %s" % (loader.__name__, sys.exc_value)
280            logger.error(msg)
281        return reader_found
282
283    def _identify_plugin(self, module):
284        """
285        Look into a module to find whether it contains a
286        Reader class. If so, add it to readers and (potentially)
287        to the list of writers.
288        :param module: module object
289
290        """
291        reader_found = False
292
293        if hasattr(module, "Reader"):
294            try:
295                # Find supported extensions
296                loader = module.Reader()
297                for ext in loader.ext:
298                    if ext not in self.loaders:
299                        self.loaders[ext] = []
300                    # When finding a reader at run time,
301                    # treat this reader as the new default
302                    self.loaders[ext].insert(0, loader.read)
303
304                    reader_found = True
305
306                    # Keep track of wildcards
307                    type_name = module.__name__
308                    if hasattr(loader, 'type_name'):
309                        type_name = loader.type_name
310                    wcard = "%s files (*%s)|*%s" % (type_name, ext.lower(),
311                                                    ext.lower())
312                    if wcard not in self.wildcards:
313                        self.wildcards.append(wcard)
314
315                # Check whether writing is supported
316                if hasattr(loader, 'write'):
317                    for ext in loader.ext:
318                        if ext not in self.writers:
319                            self.writers[ext] = []
320                        self.writers[ext].insert(0, loader.write)
321
322            except:
323                msg = "Loader: Error accessing Reader"
324                msg += " in %s\n  %s" % (module.__name__, sys.exc_value)
325                logger.error(msg)
326        return reader_found
327
328    def lookup_writers(self, path):
329        """
330        :return: the loader associated with the file type of path.
331        :Raises ValueError: if file type is not known.
332        """
333        # Find matching extensions
334        extlist = [ext for ext in self.extensions() if path.endswith(ext)]
335        # Sort matching extensions by decreasing order of length
336        extlist.sort(key=len)
337        # Combine loaders for matching extensions into one big list
338        writers = []
339        for L in [self.writers[ext] for ext in extlist]:
340            writers.extend(L)
341        # Remove duplicates if they exist
342        if len(writers) != len(set(writers)):
343            result = []
344            for L in writers:
345                if L not in result:
346                    result.append(L)
347            writers = L
348        # Raise an error if there are no matching extensions
349        if len(writers) == 0:
350            raise ValueError("Unknown file type for " + path)
351        # All done
352        return writers
353
354    def save(self, path, data, format=None):
355        """
356        Call the writer for the file type of path.
357
358        Raises ValueError if no writer is available.
359        Raises KeyError if format is not available.
360        May raise a writer-defined exception if writer fails.
361        """
362        if format is None:
363            writers = self.lookup_writers(path)
364        else:
365            writers = self.writers[format]
366        for fn in writers:
367            try:
368                return fn(path, data)
369            except Exception:
370                pass  # give other loaders a chance to succeed
371        # If we get here it is because all loaders failed
372        raise  # reraises last exception
373
374
375class Loader(object):
376    """
377    Utility class to use the Registry as a singleton.
378    """
379    ## Registry instance
380    __registry = Registry()
381
382    def associate_file_type(self, ext, module):
383        """
384        Look into a module to find whether it contains a
385        Reader class. If so, append it to readers and (potentially)
386        to the list of writers for the given extension
387
388        :param ext: file extension [string]
389        :param module: module object
390        """
391        return self.__registry.associate_file_type(ext, module)
392
393    def associate_file_reader(self, ext, loader):
394        """
395        Append a reader object to readers
396
397        :param ext: file extension [string]
398        :param module: reader object
399        """
400        return self.__registry.associate_file_reader(ext, loader)
401
402    def load(self, file, format=None):
403        """
404        Load a file
405
406        :param file: file name (path)
407        :param format: specified format to use (optional)
408        :return: DataInfo object
409        """
410        return self.__registry.load(file, format)
411
412    def save(self, file, data, format):
413        """
414        Save a DataInfo object to file
415        :param file: file name (path)
416        :param data: DataInfo object
417        :param format: format to write the data in
418        """
419        return self.__registry.save(file, data, format)
420
421    def _get_registry_creation_time(self):
422        """
423        Internal method used to test the uniqueness
424        of the registry object
425        """
426        return self.__registry._created
427
428    def find_plugins(self, directory):
429        """
430        Find plugins in a given directory
431
432        :param dir: directory to look into to find new readers/writers
433        """
434        return self.__registry.find_plugins(directory)
435
436    def get_wildcards(self):
437        """
438        Return the list of wildcards
439        """
440        return self.__registry.wildcards
Note: See TracBrowser for help on using the repository browser.