1 | """ |
---|
2 | File extension registry. |
---|
3 | |
---|
4 | This provides routines for opening files based on extension, |
---|
5 | and registers the built-in file extensions. |
---|
6 | """ |
---|
7 | from __future__ import print_function |
---|
8 | |
---|
9 | from sas.sascalc.dataloader.loader_exceptions import NoKnownLoaderException |
---|
10 | |
---|
11 | |
---|
12 | class ExtensionRegistry(object): |
---|
13 | """ |
---|
14 | Associate a file loader with an extension. |
---|
15 | |
---|
16 | Note that there may be multiple loaders for the same extension. |
---|
17 | |
---|
18 | Example: :: |
---|
19 | |
---|
20 | registry = ExtensionRegistry() |
---|
21 | |
---|
22 | # Add an association by setting an element |
---|
23 | registry['.zip'] = unzip |
---|
24 | |
---|
25 | # Multiple extensions for one loader |
---|
26 | registry['.tgz'] = untar |
---|
27 | registry['.tar.gz'] = untar |
---|
28 | |
---|
29 | # Generic extensions to use after trying more specific extensions; |
---|
30 | # these will be checked after the more specific extensions fail. |
---|
31 | registry['.gz'] = gunzip |
---|
32 | |
---|
33 | # Multiple loaders for one extension |
---|
34 | registry['.cx'] = cx1 |
---|
35 | registry['.cx'] = cx2 |
---|
36 | registry['.cx'] = cx3 |
---|
37 | |
---|
38 | # Show registered extensions |
---|
39 | print registry.extensions() |
---|
40 | |
---|
41 | # Can also register a format name for explicit control from caller |
---|
42 | registry['cx3'] = cx3 |
---|
43 | print registry.formats() |
---|
44 | |
---|
45 | # Retrieve loaders for a file name |
---|
46 | registry.lookup('hello.cx') -> [cx3,cx2,cx1] |
---|
47 | |
---|
48 | # Run loader on a filename |
---|
49 | registry.load('hello.cx') -> |
---|
50 | try: |
---|
51 | return cx3('hello.cx') |
---|
52 | except: |
---|
53 | try: |
---|
54 | return cx2('hello.cx') |
---|
55 | except: |
---|
56 | return cx1('hello.cx') |
---|
57 | |
---|
58 | # Load in a specific format ignoring extension |
---|
59 | registry.load('hello.cx',format='cx3') -> |
---|
60 | return cx3('hello.cx') |
---|
61 | """ |
---|
62 | def __init__(self, **kw): |
---|
63 | self.loaders = {} |
---|
64 | |
---|
65 | def __setitem__(self, ext, loader): |
---|
66 | if ext not in self.loaders: |
---|
67 | self.loaders[ext] = [] |
---|
68 | self.loaders[ext].insert(0,loader) |
---|
69 | |
---|
70 | def __getitem__(self, ext): |
---|
71 | return self.loaders[ext] |
---|
72 | |
---|
73 | def __contains__(self, ext): |
---|
74 | return ext in self.loaders |
---|
75 | |
---|
76 | def formats(self): |
---|
77 | """ |
---|
78 | Return a sorted list of the registered formats. |
---|
79 | """ |
---|
80 | names = [a for a in self.loaders.keys() if not a.startswith('.')] |
---|
81 | names.sort() |
---|
82 | return names |
---|
83 | |
---|
84 | def extensions(self): |
---|
85 | """ |
---|
86 | Return a sorted list of registered extensions. |
---|
87 | """ |
---|
88 | exts = [a for a in self.loaders.keys() if a.startswith('.')] |
---|
89 | exts.sort() |
---|
90 | return exts |
---|
91 | |
---|
92 | def lookup(self, path): |
---|
93 | """ |
---|
94 | Return the loader associated with the file type of path. |
---|
95 | |
---|
96 | :param path: Data file path |
---|
97 | :raises ValueError: When no loaders are found for the file. |
---|
98 | :return: List of available readers for the file extension |
---|
99 | """ |
---|
100 | # Find matching extensions |
---|
101 | extlist = [ext for ext in self.extensions() if path.endswith(ext)] |
---|
102 | # Sort matching extensions by decreasing order of length |
---|
103 | extlist.sort(lambda a,b: len(a)<len(b)) |
---|
104 | # Combine loaders for matching extensions into one big list |
---|
105 | loaders = [] |
---|
106 | for L in [self.loaders[ext] for ext in extlist]: |
---|
107 | loaders.extend(L) |
---|
108 | # Remove duplicates if they exist |
---|
109 | if len(loaders) != len(set(loaders)): |
---|
110 | result = [] |
---|
111 | for L in loaders: |
---|
112 | if L not in result: result.append(L) |
---|
113 | loaders = L |
---|
114 | # Raise an error if there are no matching extensions |
---|
115 | if len(loaders) == 0: |
---|
116 | raise ValueError("Unknown file type for "+path) |
---|
117 | return loaders |
---|
118 | |
---|
119 | def load(self, path, format=None): |
---|
120 | """ |
---|
121 | Call the loader for the file type of path. |
---|
122 | |
---|
123 | :raise ValueError: if no loader is available. |
---|
124 | :raise KeyError: if format is not available. |
---|
125 | May raise a loader-defined exception if loader fails. |
---|
126 | """ |
---|
127 | loaders = [] |
---|
128 | if format is None: |
---|
129 | try: |
---|
130 | loaders = self.lookup(path) |
---|
131 | except ValueError as e: |
---|
132 | pass |
---|
133 | else: |
---|
134 | try: |
---|
135 | loaders = self.loaders[format] |
---|
136 | except KeyError as e: |
---|
137 | pass |
---|
138 | last_exc = None |
---|
139 | for fn in loaders: |
---|
140 | try: |
---|
141 | return fn(path) |
---|
142 | except Exception as e: |
---|
143 | last_exc = e |
---|
144 | pass # give other loaders a chance to succeed |
---|
145 | # If we get here it is because all loaders failed |
---|
146 | if last_exc is not None and len(loaders) != 0: |
---|
147 | # If file has associated loader(s) and they;ve failed |
---|
148 | raise last_exc |
---|
149 | raise NoKnownLoaderException(e.message) # raise generic exception |
---|