[4bae1ef] | 1 | """ |
---|
| 2 | File extension registry. |
---|
| 3 | |
---|
| 4 | This provides routines for opening files based on extension, |
---|
| 5 | and registers the built-in file extensions. |
---|
| 6 | """ |
---|
| 7 | |
---|
[270c882b] | 8 | from sas.sascalc.dataloader.loader_exceptions import NoKnownLoaderException |
---|
| 9 | |
---|
[4bae1ef] | 10 | |
---|
| 11 | class ExtensionRegistry(object): |
---|
| 12 | """ |
---|
| 13 | Associate a file loader with an extension. |
---|
| 14 | |
---|
| 15 | Note that there may be multiple loaders for the same extension. |
---|
| 16 | |
---|
[51f14603] | 17 | Example: :: |
---|
[4bae1ef] | 18 | |
---|
[51f14603] | 19 | registry = ExtensionRegistry() |
---|
[4bae1ef] | 20 | |
---|
[51f14603] | 21 | # Add an association by setting an element |
---|
| 22 | registry['.zip'] = unzip |
---|
[5d8f9b3] | 23 | |
---|
[51f14603] | 24 | # Multiple extensions for one loader |
---|
| 25 | registry['.tgz'] = untar |
---|
| 26 | registry['.tar.gz'] = untar |
---|
[4bae1ef] | 27 | |
---|
[51f14603] | 28 | # Generic extensions to use after trying more specific extensions; |
---|
| 29 | # these will be checked after the more specific extensions fail. |
---|
| 30 | registry['.gz'] = gunzip |
---|
[4bae1ef] | 31 | |
---|
[51f14603] | 32 | # Multiple loaders for one extension |
---|
| 33 | registry['.cx'] = cx1 |
---|
| 34 | registry['.cx'] = cx2 |
---|
| 35 | registry['.cx'] = cx3 |
---|
[4bae1ef] | 36 | |
---|
[51f14603] | 37 | # Show registered extensions |
---|
| 38 | print registry.extensions() |
---|
[5d8f9b3] | 39 | |
---|
[51f14603] | 40 | # Can also register a format name for explicit control from caller |
---|
| 41 | registry['cx3'] = cx3 |
---|
| 42 | print registry.formats() |
---|
[4bae1ef] | 43 | |
---|
[51f14603] | 44 | # Retrieve loaders for a file name |
---|
| 45 | registry.lookup('hello.cx') -> [cx3,cx2,cx1] |
---|
[4bae1ef] | 46 | |
---|
[51f14603] | 47 | # Run loader on a filename |
---|
| 48 | registry.load('hello.cx') -> |
---|
[4bae1ef] | 49 | try: |
---|
[51f14603] | 50 | return cx3('hello.cx') |
---|
[4bae1ef] | 51 | except: |
---|
[51f14603] | 52 | try: |
---|
| 53 | return cx2('hello.cx') |
---|
| 54 | except: |
---|
| 55 | return cx1('hello.cx') |
---|
[4bae1ef] | 56 | |
---|
[51f14603] | 57 | # Load in a specific format ignoring extension |
---|
| 58 | registry.load('hello.cx',format='cx3') -> |
---|
| 59 | return cx3('hello.cx') |
---|
[4bae1ef] | 60 | """ |
---|
| 61 | def __init__(self, **kw): |
---|
| 62 | self.loaders = {} |
---|
[270c882b] | 63 | |
---|
[4bae1ef] | 64 | def __setitem__(self, ext, loader): |
---|
| 65 | if ext not in self.loaders: |
---|
| 66 | self.loaders[ext] = [] |
---|
| 67 | self.loaders[ext].insert(0,loader) |
---|
[270c882b] | 68 | |
---|
[4bae1ef] | 69 | def __getitem__(self, ext): |
---|
| 70 | return self.loaders[ext] |
---|
[270c882b] | 71 | |
---|
[4bae1ef] | 72 | def __contains__(self, ext): |
---|
| 73 | return ext in self.loaders |
---|
[270c882b] | 74 | |
---|
[4bae1ef] | 75 | def formats(self): |
---|
| 76 | """ |
---|
| 77 | Return a sorted list of the registered formats. |
---|
| 78 | """ |
---|
| 79 | names = [a for a in self.loaders.keys() if not a.startswith('.')] |
---|
| 80 | names.sort() |
---|
| 81 | return names |
---|
[270c882b] | 82 | |
---|
[4bae1ef] | 83 | def extensions(self): |
---|
| 84 | """ |
---|
| 85 | Return a sorted list of registered extensions. |
---|
| 86 | """ |
---|
| 87 | exts = [a for a in self.loaders.keys() if a.startswith('.')] |
---|
| 88 | exts.sort() |
---|
| 89 | return exts |
---|
[270c882b] | 90 | |
---|
[4bae1ef] | 91 | def lookup(self, path): |
---|
| 92 | """ |
---|
| 93 | Return the loader associated with the file type of path. |
---|
| 94 | |
---|
[7f75a3f] | 95 | :param path: Data file path |
---|
| 96 | :raises ValueError: When no loaders are found for the file. |
---|
| 97 | :return: List of available readers for the file extension |
---|
[4bae1ef] | 98 | """ |
---|
| 99 | # Find matching extensions |
---|
| 100 | extlist = [ext for ext in self.extensions() if path.endswith(ext)] |
---|
| 101 | # Sort matching extensions by decreasing order of length |
---|
| 102 | extlist.sort(lambda a,b: len(a)<len(b)) |
---|
| 103 | # Combine loaders for matching extensions into one big list |
---|
| 104 | loaders = [] |
---|
| 105 | for L in [self.loaders[ext] for ext in extlist]: |
---|
| 106 | loaders.extend(L) |
---|
| 107 | # Remove duplicates if they exist |
---|
| 108 | if len(loaders) != len(set(loaders)): |
---|
| 109 | result = [] |
---|
| 110 | for L in loaders: |
---|
| 111 | if L not in result: result.append(L) |
---|
| 112 | loaders = L |
---|
| 113 | # Raise an error if there are no matching extensions |
---|
| 114 | if len(loaders) == 0: |
---|
[270c882b] | 115 | raise ValueError("Unknown file type for "+path) |
---|
[4bae1ef] | 116 | return loaders |
---|
[270c882b] | 117 | |
---|
[4bae1ef] | 118 | def load(self, path, format=None): |
---|
| 119 | """ |
---|
| 120 | Call the loader for the file type of path. |
---|
| 121 | |
---|
[7f75a3f] | 122 | :raise ValueError: if no loader is available. |
---|
| 123 | :raise KeyError: if format is not available. |
---|
| 124 | May raise a loader-defined exception if loader fails. |
---|
[4bae1ef] | 125 | """ |
---|
[7f75a3f] | 126 | loaders = [] |
---|
[4bae1ef] | 127 | if format is None: |
---|
[270c882b] | 128 | try: |
---|
| 129 | loaders = self.lookup(path) |
---|
| 130 | except ValueError as e: |
---|
| 131 | pass |
---|
[4bae1ef] | 132 | else: |
---|
[270c882b] | 133 | try: |
---|
| 134 | loaders = self.loaders[format] |
---|
| 135 | except KeyError as e: |
---|
| 136 | pass |
---|
[4bae1ef] | 137 | for fn in loaders: |
---|
| 138 | try: |
---|
| 139 | return fn(path) |
---|
[270c882b] | 140 | except Exception as e: |
---|
| 141 | pass # give other loaders a chance to succeed |
---|
[4bae1ef] | 142 | # If we get here it is because all loaders failed |
---|
[270c882b] | 143 | raise NoKnownLoaderException(e.message) # raise generic exception |
---|