source: sasview/src/sas/sascalc/dataloader/loader.py @ ee62ed7

magnetic_scattrelease-4.2.2ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since ee62ed7 was dc8d1c2, checked in by Paul Kienzle <pkienzle@…>, 7 years ago

python 3 doesn't support list.sort(cmp=…)

  • Property mode set to 100644
File size: 15.5 KB
Line 
1"""
2    File handler to support different file extensions.
3    Uses reflectometer registry utility.
4
5    The default readers are found in the 'readers' sub-module
6    and registered by default at initialization time.
7
8    To add a new default reader, one must register it in
9    the register_readers method found in readers/__init__.py.
10
11    A utility method (find_plugins) is available to inspect
12    a directory (for instance, a user plug-in directory) and
13    look for new readers/writers.
14"""
15#####################################################################
16# This software was developed by the University of Tennessee as part of the
17# Distributed Data Analysis of Neutron Scattering Experiments (DANSE)
18# project funded by the US National Science Foundation.
19# See the license text in license.txt
20# copyright 2008, University of Tennessee
21######################################################################
22
23import os
24import sys
25import logging
26import time
27from zipfile import ZipFile
28
29from sas.sascalc.data_util.registry import ExtensionRegistry
30
31# Default readers are defined in the readers sub-module
32from . import readers
33from .loader_exceptions import NoKnownLoaderException, FileContentsException,\
34    DefaultReaderException
35from .readers import ascii_reader
36from .readers import cansas_reader
37from .readers import cansas_reader_HDF5
38
39logger = logging.getLogger(__name__)
40
41
42class Registry(ExtensionRegistry):
43    """
44    Registry class for file format extensions.
45    Readers and writers are supported.
46    """
47    def __init__(self):
48        super(Registry, self).__init__()
49
50        # Writers
51        self.writers = {}
52
53        # List of wildcards
54        self.wildcards = ['All (*.*)|*.*']
55
56        # Creation time, for testing
57        self._created = time.time()
58
59        # Register default readers
60        readers.read_associations(self)
61
62    def load(self, path, format=None):
63        """
64        Call the loader for the file type of path.
65
66        :param path: file path
67        :param format: explicit extension, to force the use
68            of a particular reader
69
70        Defaults to the ascii (multi-column), cansas XML, and cansas NeXuS
71        readers if no reader was registered for the file's extension.
72        """
73        # Gets set to a string if the file has an associated reader that fails
74        msg_from_reader = None
75        try:
76            return super(Registry, self).load(path, format=format)
77        #except Exception: raise  # for debugging, don't use fallback loader
78        except NoKnownLoaderException as nkl_e:
79            pass  # Try the ASCII reader
80        except FileContentsException as fc_exc:
81            # File has an associated reader but it failed.
82            # Save the error message to display later, but try the 3 default loaders
83            msg_from_reader = fc_exc.message
84        except Exception:
85            pass
86
87        # File has no associated reader, or the associated reader failed.
88        # Try the ASCII reader
89        try:
90            ascii_loader = ascii_reader.Reader()
91            return ascii_loader.read(path)
92        except DefaultReaderException:
93            pass  # Loader specific error to try the cansas XML reader
94        except FileContentsException as e:
95            if msg_from_reader is None:
96                raise RuntimeError(e.message)
97
98        # ASCII reader failed - try CanSAS xML reader
99        try:
100            cansas_loader = cansas_reader.Reader()
101            return cansas_loader.read(path)
102        except DefaultReaderException:
103            pass  # Loader specific error to try the NXcanSAS reader
104        except FileContentsException as e:
105            if msg_from_reader is None:
106                raise RuntimeError(e.message)
107        except Exception:
108            pass
109
110        # CanSAS XML reader failed - try NXcanSAS reader
111        try:
112            cansas_nexus_loader = cansas_reader_HDF5.Reader()
113            return cansas_nexus_loader.read(path)
114        except DefaultReaderException as e:
115            logging.error("No default loader can load the data")
116            # No known reader available. Give up and throw an error
117            if msg_from_reader is None:
118                msg = "\nUnknown data format: {}.\nThe file is not a ".format(path)
119                msg += "known format that can be loaded by SasView.\n"
120                raise NoKnownLoaderException(msg)
121            else:
122                # Associated reader and default readers all failed.
123                # Show error message from associated reader
124                raise RuntimeError(msg_from_reader)
125        except FileContentsException as e:
126            err_msg = msg_from_reader if msg_from_reader is not None else e.message
127            raise RuntimeError(err_msg)
128
129    def find_plugins(self, dir):
130        """
131        Find readers in a given directory. This method
132        can be used to inspect user plug-in directories to
133        find new readers/writers.
134
135        :param dir: directory to search into
136        :return: number of readers found
137        """
138        readers_found = 0
139        temp_path = os.path.abspath(dir)
140        if not os.path.isdir(temp_path):
141            temp_path = os.path.join(os.getcwd(), dir)
142        if not os.path.isdir(temp_path):
143            temp_path = os.path.join(os.path.dirname(__file__), dir)
144        if not os.path.isdir(temp_path):
145            temp_path = os.path.join(os.path.dirname(sys.path[0]), dir)
146
147        dir = temp_path
148        # Check whether the directory exists
149        if not os.path.isdir(dir):
150            msg = "DataLoader couldn't locate DataLoader plugin folder."
151            msg += """ "%s" does not exist""" % dir
152            logger.warning(msg)
153            return readers_found
154
155        for item in os.listdir(dir):
156            full_path = os.path.join(dir, item)
157            if os.path.isfile(full_path):
158
159                # Process python files
160                if item.endswith('.py'):
161                    toks = os.path.splitext(os.path.basename(item))
162                    try:
163                        sys.path.insert(0, os.path.abspath(dir))
164                        module = __import__(toks[0], globals(), locals())
165                        if self._identify_plugin(module):
166                            readers_found += 1
167                    except:
168                        msg = "Loader: Error importing "
169                        msg += "%s\n  %s" % (item, sys.exc_value)
170                        logger.error(msg)
171
172                # Process zip files
173                elif item.endswith('.zip'):
174                    try:
175                        # Find the modules in the zip file
176                        zfile = ZipFile(item)
177                        nlist = zfile.namelist()
178
179                        sys.path.insert(0, item)
180                        for mfile in nlist:
181                            try:
182                                # Change OS path to python path
183                                fullname = mfile.replace('/', '.')
184                                fullname = os.path.splitext(fullname)[0]
185                                module = __import__(fullname, globals(),
186                                                    locals(), [""])
187                                if self._identify_plugin(module):
188                                    readers_found += 1
189                            except:
190                                msg = "Loader: Error importing"
191                                msg += " %s\n  %s" % (mfile, sys.exc_value)
192                                logger.error(msg)
193
194                    except:
195                        msg = "Loader: Error importing "
196                        msg += " %s\n  %s" % (item, sys.exc_value)
197                        logger.error(msg)
198
199        return readers_found
200
201    def associate_file_type(self, ext, module):
202        """
203        Look into a module to find whether it contains a
204        Reader class. If so, APPEND it to readers and (potentially)
205        to the list of writers for the given extension
206
207        :param ext: file extension [string]
208        :param module: module object
209        """
210        reader_found = False
211
212        if hasattr(module, "Reader"):
213            try:
214                # Find supported extensions
215                loader = module.Reader()
216                if ext not in self.loaders:
217                    self.loaders[ext] = []
218                # Append the new reader to the list
219                self.loaders[ext].append(loader.read)
220
221                reader_found = True
222
223                # Keep track of wildcards
224                type_name = module.__name__
225                if hasattr(loader, 'type_name'):
226                    type_name = loader.type_name
227
228                wcard = "%s files (*%s)|*%s" % (type_name, ext.lower(),
229                                                ext.lower())
230                if wcard not in self.wildcards:
231                    self.wildcards.append(wcard)
232
233                # Check whether writing is supported
234                if hasattr(loader, 'write'):
235                    if ext not in self.writers:
236                        self.writers[ext] = []
237                    # Append the new writer to the list
238                    self.writers[ext].append(loader.write)
239
240            except:
241                msg = "Loader: Error accessing"
242                msg += " Reader in %s\n  %s" % (module.__name__, sys.exc_value)
243                logger.error(msg)
244        return reader_found
245
246    def associate_file_reader(self, ext, loader):
247        """
248        Append a reader object to readers
249
250        :param ext: file extension [string]
251        :param module: reader object
252        """
253        reader_found = False
254
255        try:
256            # Find supported extensions
257            if ext not in self.loaders:
258                self.loaders[ext] = []
259            # Append the new reader to the list
260            self.loaders[ext].append(loader.read)
261
262            reader_found = True
263
264            # Keep track of wildcards
265            if hasattr(loader, 'type_name'):
266                type_name = loader.type_name
267
268                wcard = "%s files (*%s)|*%s" % (type_name, ext.lower(),
269                                                ext.lower())
270                if wcard not in self.wildcards:
271                    self.wildcards.append(wcard)
272
273        except:
274            msg = "Loader: Error accessing Reader "
275            msg += "in %s\n  %s" % (loader.__name__, sys.exc_value)
276            logger.error(msg)
277        return reader_found
278
279    def _identify_plugin(self, module):
280        """
281        Look into a module to find whether it contains a
282        Reader class. If so, add it to readers and (potentially)
283        to the list of writers.
284        :param module: module object
285
286        """
287        reader_found = False
288
289        if hasattr(module, "Reader"):
290            try:
291                # Find supported extensions
292                loader = module.Reader()
293                for ext in loader.ext:
294                    if ext not in self.loaders:
295                        self.loaders[ext] = []
296                    # When finding a reader at run time,
297                    # treat this reader as the new default
298                    self.loaders[ext].insert(0, loader.read)
299
300                    reader_found = True
301
302                    # Keep track of wildcards
303                    type_name = module.__name__
304                    if hasattr(loader, 'type_name'):
305                        type_name = loader.type_name
306                    wcard = "%s files (*%s)|*%s" % (type_name, ext.lower(),
307                                                    ext.lower())
308                    if wcard not in self.wildcards:
309                        self.wildcards.append(wcard)
310
311                # Check whether writing is supported
312                if hasattr(loader, 'write'):
313                    for ext in loader.ext:
314                        if ext not in self.writers:
315                            self.writers[ext] = []
316                        self.writers[ext].insert(0, loader.write)
317
318            except:
319                msg = "Loader: Error accessing Reader"
320                msg += " in %s\n  %s" % (module.__name__, sys.exc_value)
321                logger.error(msg)
322        return reader_found
323
324    def lookup_writers(self, path):
325        """
326        :return: the loader associated with the file type of path.
327        :Raises ValueError: if file type is not known.
328        """
329        # Find matching extensions
330        extlist = [ext for ext in self.extensions() if path.endswith(ext)]
331        # Sort matching extensions by decreasing order of length
332        extlist.sort(key=len)
333        # Combine loaders for matching extensions into one big list
334        writers = []
335        for L in [self.writers[ext] for ext in extlist]:
336            writers.extend(L)
337        # Remove duplicates if they exist
338        if len(writers) != len(set(writers)):
339            result = []
340            for L in writers:
341                if L not in result:
342                    result.append(L)
343            writers = L
344        # Raise an error if there are no matching extensions
345        if len(writers) == 0:
346            raise ValueError("Unknown file type for " + path)
347        # All done
348        return writers
349
350    def save(self, path, data, format=None):
351        """
352        Call the writer for the file type of path.
353
354        Raises ValueError if no writer is available.
355        Raises KeyError if format is not available.
356        May raise a writer-defined exception if writer fails.
357        """
358        if format is None:
359            writers = self.lookup_writers(path)
360        else:
361            writers = self.writers[format]
362        for fn in writers:
363            try:
364                return fn(path, data)
365            except Exception:
366                pass  # give other loaders a chance to succeed
367        # If we get here it is because all loaders failed
368        raise  # reraises last exception
369
370
371class Loader(object):
372    """
373    Utility class to use the Registry as a singleton.
374    """
375    ## Registry instance
376    __registry = Registry()
377
378    def associate_file_type(self, ext, module):
379        """
380        Look into a module to find whether it contains a
381        Reader class. If so, append it to readers and (potentially)
382        to the list of writers for the given extension
383
384        :param ext: file extension [string]
385        :param module: module object
386        """
387        return self.__registry.associate_file_type(ext, module)
388
389    def associate_file_reader(self, ext, loader):
390        """
391        Append a reader object to readers
392
393        :param ext: file extension [string]
394        :param module: reader object
395        """
396        return self.__registry.associate_file_reader(ext, loader)
397
398    def load(self, file, format=None):
399        """
400        Load a file
401
402        :param file: file name (path)
403        :param format: specified format to use (optional)
404        :return: DataInfo object
405        """
406        return self.__registry.load(file, format)
407
408    def save(self, file, data, format):
409        """
410        Save a DataInfo object to file
411        :param file: file name (path)
412        :param data: DataInfo object
413        :param format: format to write the data in
414        """
415        return self.__registry.save(file, data, format)
416
417    def _get_registry_creation_time(self):
418        """
419        Internal method used to test the uniqueness
420        of the registry object
421        """
422        return self.__registry._created
423
424    def find_plugins(self, directory):
425        """
426        Find plugins in a given directory
427
428        :param dir: directory to look into to find new readers/writers
429        """
430        return self.__registry.find_plugins(directory)
431
432    def get_wildcards(self):
433        """
434        Return the list of wildcards
435        """
436        return self.__registry.wildcards
Note: See TracBrowser for help on using the repository browser.