1 | """ |
---|
2 | Convert a restructured text document to html. |
---|
3 | |
---|
4 | Inline math markup can uses the *math* directive, or it can use latex |
---|
5 | style *\$expression\$*. Math can be rendered using simple html and |
---|
6 | unicode, or with mathjax. |
---|
7 | """ |
---|
8 | |
---|
9 | import re |
---|
10 | from contextlib import contextmanager |
---|
11 | |
---|
12 | from docutils.core import publish_parts |
---|
13 | from docutils.writers.html4css1 import HTMLTranslator |
---|
14 | from docutils.nodes import SkipNode |
---|
15 | |
---|
16 | |
---|
17 | def rst2html(rst, part="whole", math_output="html"): |
---|
18 | r""" |
---|
19 | Convert restructured text into simple html. |
---|
20 | |
---|
21 | Valid *math_output* formats for formulas include: |
---|
22 | - html |
---|
23 | - mathml |
---|
24 | - mathjax |
---|
25 | See `http://docutils.sourceforge.net/docs/user/config.html#math-output`_ |
---|
26 | for details. |
---|
27 | |
---|
28 | The following *part* choices are available: |
---|
29 | - whole: the entire html document |
---|
30 | - html_body: document division with title and contents and footer |
---|
31 | - body: contents only |
---|
32 | |
---|
33 | There are other parts, but they don't make sense alone: |
---|
34 | |
---|
35 | subtitle, version, encoding, html_prolog, header, meta, |
---|
36 | html_title, title, stylesheet, html_subtitle, html_body, |
---|
37 | body, head, body_suffix, fragment, docinfo, html_head, |
---|
38 | head_prefix, body_prefix, footer, body_pre_docinfo, whole |
---|
39 | """ |
---|
40 | # Ick! mathjax doesn't work properly with math-output, and the |
---|
41 | # others don't work properly with math_output! |
---|
42 | if math_output == "mathjax": |
---|
43 | settings = { "math_output": math_output } |
---|
44 | else: |
---|
45 | settings = { "math-output": math_output } |
---|
46 | |
---|
47 | # math2html and mathml do not support \frac12 |
---|
48 | rst = replace_compact_fraction(rst) |
---|
49 | |
---|
50 | # mathml, html do not support \tfrac |
---|
51 | if math_output in ("mathml", "html"): |
---|
52 | rst = rst.replace(r'\tfrac', r'\frac') |
---|
53 | |
---|
54 | rst = replace_dollar(rst) |
---|
55 | with suppress_html_errors(): |
---|
56 | parts = publish_parts(source=rst, writer_name='html', |
---|
57 | settings_overrides=settings) |
---|
58 | return parts[part] |
---|
59 | |
---|
60 | @contextmanager |
---|
61 | def suppress_html_errors(): |
---|
62 | r""" |
---|
63 | Context manager for keeping error reports out of the generated HTML. |
---|
64 | |
---|
65 | Within the context, system message nodes in the docutils parse tree |
---|
66 | will be ignored. After the context, the usual behaviour will be restored. |
---|
67 | """ |
---|
68 | visit_system_message = HTMLTranslator.visit_system_message |
---|
69 | HTMLTranslator.visit_system_message = _skip_node |
---|
70 | yield None |
---|
71 | HTMLTranslator.visit_system_message = visit_system_message |
---|
72 | |
---|
73 | def _skip_node(self, node): |
---|
74 | raise SkipNode |
---|
75 | |
---|
76 | |
---|
77 | _compact_fraction = re.compile(r"(\\[cdt]?frac)([0-9])([0-9])") |
---|
78 | def replace_compact_fraction(content): |
---|
79 | r""" |
---|
80 | Convert \frac12 to \frac{1}{2} for broken latex parsers |
---|
81 | """ |
---|
82 | return _compact_fraction.sub(r"\1{\2}{\3}", content) |
---|
83 | |
---|
84 | |
---|
85 | _dollar = re.compile(r"(?:^|(?<=\s|[(]))[$]([^\n]*?)(?<![\\])[$](?:$|(?=\s|[.,;)\\]))") |
---|
86 | _notdollar = re.compile(r"\\[$]") |
---|
87 | def replace_dollar(content): |
---|
88 | r""" |
---|
89 | Convert dollar signs to inline math markup in rst. |
---|
90 | """ |
---|
91 | content = _dollar.sub(r":math:`\1`",content) |
---|
92 | content = _notdollar.sub("$", content) |
---|
93 | return content |
---|
94 | |
---|
95 | |
---|
96 | def test_dollar(): |
---|
97 | assert replace_dollar(u"no dollar")==u"no dollar" |
---|
98 | assert replace_dollar(u"$only$")==u":math:`only`" |
---|
99 | assert replace_dollar(u"$first$ is good")==u":math:`first` is good" |
---|
100 | assert replace_dollar(u"so is $last$")==u"so is :math:`last`" |
---|
101 | assert replace_dollar(u"and $mid$ too")==u"and :math:`mid` too" |
---|
102 | assert replace_dollar(u"$first$, $mid$, $last$")==u":math:`first`, :math:`mid`, :math:`last`" |
---|
103 | assert replace_dollar(u"dollar\$ escape")==u"dollar$ escape" |
---|
104 | assert replace_dollar(u"dollar \$escape\$ too")==u"dollar $escape$ too" |
---|
105 | assert replace_dollar(u"spaces $in the$ math")==u"spaces :math:`in the` math" |
---|
106 | assert replace_dollar(u"emb\ $ed$\ ed")==u"emb\ :math:`ed`\ ed" |
---|
107 | assert replace_dollar(u"$first$a")==u"$first$a" |
---|
108 | assert replace_dollar(u"a$last$")==u"a$last$" |
---|
109 | assert replace_dollar(u"$37")==u"$37" |
---|
110 | assert replace_dollar(u"($37)")==u"($37)" |
---|
111 | assert replace_dollar(u"$37 - $43")==u"$37 - $43" |
---|
112 | assert replace_dollar(u"($37, $38)")==u"($37, $38)" |
---|
113 | assert replace_dollar(u"a $mid$dle a")==u"a $mid$dle a" |
---|
114 | assert replace_dollar(u"a ($in parens$) a")==u"a (:math:`in parens`) a" |
---|
115 | assert replace_dollar(u"a (again $in parens$) a")==u"a (again :math:`in parens`) a" |
---|
116 | |
---|
117 | if __name__ == "__main__": |
---|
118 | test_dollar() |
---|