[14de349] | 1 | """ |
---|
| 2 | Read a relaxed JSON file. |
---|
| 3 | |
---|
| 4 | Relaxed JSON allows comments (introduced by // and going to the end |
---|
| 5 | of the line), optional quotes on key names in dictionaries and optional |
---|
| 6 | trailing commas at the ends of lists and dictionaries. It also strips |
---|
| 7 | the leading characters up to the first '{'. Multiline strings can be |
---|
| 8 | formatted using "\n\" at the end of each line. |
---|
| 9 | |
---|
| 10 | If the file contains e.g., "var _ = {...}", then it can be edited with |
---|
| 11 | a JavaScript aware editor such as VJET for eclipse, and it will be easier |
---|
| 12 | to locate errors and adjust formatting. |
---|
| 13 | """ |
---|
| 14 | import re |
---|
| 15 | import json |
---|
| 16 | from contextlib import contextmanager |
---|
| 17 | |
---|
| 18 | try: |
---|
| 19 | from collections import OrderedDict |
---|
| 20 | except: |
---|
| 21 | from ordered_dict import OrderedDict |
---|
| 22 | |
---|
| 23 | |
---|
| 24 | _LEADING_TEXT = re.compile(r'^.*?[{]', re.DOTALL) |
---|
| 25 | _LINE_CONTINUATION = re.compile(r'\\\s*\n') |
---|
| 26 | _TRAILING_COMMENT = re.compile(r'(?P<comment>\s*//.*?)\n') |
---|
| 27 | _MULTILINE_COMMENT = re.compile(r'(?P<comment>/\*.*?\*/)', re.DOTALL) |
---|
| 28 | _UNQUOTED_FIELDNAME = re.compile(r'(?P<prefix>[,{]\s*)(?P<key>[^\s,{}:"]+)(?P<tail>\s*:)') |
---|
| 29 | _TRAILING_COMMA = re.compile(r',(?P<tail>\s*[]}])') |
---|
| 30 | |
---|
| 31 | def relaxed_load(path, **kw): |
---|
| 32 | return relaxed_loads(open(path).read(), **kw) |
---|
| 33 | |
---|
| 34 | def relaxed_loads(text, **kw): |
---|
| 35 | """ |
---|
| 36 | Parse and return a relaxed JSON string. |
---|
| 37 | """ |
---|
| 38 | ordered = kw.pop('ordered', False) |
---|
| 39 | if ordered: kw['object_pairs_hook'] = OrderedDict |
---|
| 40 | # TODO: need a little state machine that performs the translation so that |
---|
| 41 | # TODO: line and column numbers are preserved, and so that we can have |
---|
| 42 | # TODO: http:// in a string (instead of it being treated like a comment). |
---|
| 43 | #print "== raw text\n", text |
---|
| 44 | text = _LINE_CONTINUATION.sub('', text) |
---|
| 45 | #print "== joined lines\n", text |
---|
| 46 | text = _TRAILING_COMMENT.sub(r'\n', text) |
---|
| 47 | text = _MULTILINE_COMMENT.sub(r'', text) |
---|
| 48 | #print "== stripped comments\n", text |
---|
| 49 | text = _LEADING_TEXT.sub('{', text) |
---|
| 50 | #print "== trimmed text\n", text |
---|
| 51 | text = _UNQUOTED_FIELDNAME.sub(r'\g<prefix>"\g<key>"\g<tail>', text) |
---|
| 52 | #print "== quoted field names\n", text |
---|
| 53 | text = _TRAILING_COMMA.sub(r'\g<tail>', text) |
---|
| 54 | #print "== processed text\n", text |
---|
| 55 | try: |
---|
| 56 | obj = json.loads(text, object_hook=decode_dict_as_str, **kw) |
---|
| 57 | except ValueError, e: |
---|
| 58 | msg = [str(e)] |
---|
| 59 | M = re.findall('line ([0-9]*) column ([0-9]*)', msg[0]) |
---|
| 60 | if M: |
---|
| 61 | line,col = int(M[0][0]), int(M[0][1]) |
---|
| 62 | lines = text.split("\n") |
---|
| 63 | if line>=2: msg.append(lines[line-2]) |
---|
| 64 | if line>=1: msg.append(lines[line-1]) |
---|
| 65 | msg.append(" "*(col-1) + "^") |
---|
| 66 | if line<len(lines): msg.append(lines[line]) |
---|
| 67 | msg = "\n".join(msg) |
---|
| 68 | raise e.__class__(msg) |
---|
| 69 | return obj |
---|
| 70 | |
---|
| 71 | @contextmanager |
---|
| 72 | def float_format(formatstr='.15g'): |
---|
| 73 | """ |
---|
| 74 | Allow the float format to be changed for a json encoding action. |
---|
| 75 | |
---|
| 76 | This is a context manager, and should be used for example as:: |
---|
| 77 | |
---|
| 78 | >>> with float_format('.2g'): |
---|
| 79 | >>> print json.dumps(sqrt(2)) |
---|
| 80 | 1.41 |
---|
| 81 | """ |
---|
| 82 | formatter = json.encoder.FLOAT_REPR |
---|
| 83 | json.encoder.FLOAT_REPR = lambda o: format(o, formatstr) |
---|
| 84 | yield |
---|
| 85 | json.encoder.FLOAT_REPR = formatter |
---|
| 86 | |
---|
| 87 | def numpy_encoder(o): |
---|
| 88 | """ |
---|
| 89 | JSON encoder for numpy data. |
---|
| 90 | |
---|
| 91 | To automatically convert numpy data to lists when writing a datastream |
---|
| 92 | use json.dumps(object, default=numpy_json). |
---|
| 93 | """ |
---|
| 94 | try: |
---|
| 95 | return o.tolist() |
---|
| 96 | except AttributeError: |
---|
| 97 | raise TypeError |
---|
| 98 | |
---|
| 99 | |
---|
| 100 | def _decode_list(lst): |
---|
| 101 | newlist = [] |
---|
| 102 | for i in lst: |
---|
| 103 | if isinstance(i, unicode): |
---|
| 104 | i = i.encode('utf-8') |
---|
| 105 | elif isinstance(i, list): |
---|
| 106 | i = _decode_list(i) |
---|
| 107 | newlist.append(i) |
---|
| 108 | return newlist |
---|
| 109 | |
---|
| 110 | def decode_dict_as_str(dct): |
---|
| 111 | newdict = {} |
---|
| 112 | for k, v in dct.iteritems(): |
---|
| 113 | if isinstance(k, unicode): |
---|
| 114 | k = k.encode('utf-8') |
---|
| 115 | if isinstance(v, unicode): |
---|
| 116 | v = v.encode('utf-8') |
---|
| 117 | elif isinstance(v, list): |
---|
| 118 | v = _decode_list(v) |
---|
| 119 | newdict[k] = v |
---|
| 120 | return newdict |
---|
| 121 | |
---|
| 122 | |
---|
| 123 | def test(): |
---|
| 124 | """ |
---|
| 125 | Verify that the translation from pseudo-JSON to JSON works. |
---|
| 126 | """ |
---|
| 127 | good = """\ |
---|
| 128 | // This is a source definition with no errors |
---|
| 129 | var entry = { |
---|
| 130 | field : { // A comment about the field |
---|
| 131 | "field" : "te\\ |
---|
| 132 | x\\ |
---|
| 133 | t", |
---|
| 134 | other$field : 56, |
---|
| 135 | }, |
---|
| 136 | /* |
---|
| 137 | multiline comment |
---|
| 138 | */ |
---|
| 139 | secondfield : { |
---|
| 140 | content: ["string", "string"], /* a second comment */ |
---|
| 141 | content: [{name:"good", value:3, URL:"http:\\/\\/my.url.com"},] |
---|
| 142 | }, |
---|
| 143 | } |
---|
| 144 | """ |
---|
| 145 | broken = """\ |
---|
| 146 | // This is a source definition with a missing comma |
---|
| 147 | { |
---|
| 148 | field : { // A comment about the field |
---|
| 149 | field : "te\\ |
---|
| 150 | x\\ |
---|
| 151 | t" |
---|
| 152 | other$field : 56, |
---|
| 153 | }, |
---|
| 154 | /* |
---|
| 155 | multiline comment |
---|
| 156 | */ |
---|
| 157 | secondfield : { |
---|
| 158 | content: ["string", "string"], /* a second comment */ |
---|
| 159 | content: [{name:"good", value:3},] |
---|
| 160 | }, |
---|
| 161 | } |
---|
| 162 | """ |
---|
| 163 | result = relaxed_loads(good) |
---|
| 164 | assert result['field']['field'] == "text" |
---|
| 165 | assert result['field']['other$field'] == 56 |
---|
| 166 | assert result['secondfield']['content'][0]['name'] == 'good' |
---|
| 167 | try: relaxed_loads(broken) |
---|
| 168 | except ValueError, _: pass |
---|
| 169 | else: raise Exception("No exception raised in broken") |
---|
| 170 | |
---|
| 171 | if __name__ == "__main__": |
---|
| 172 | test() |
---|