Ticket #574: rst2html.py

File rst2html.py, 4.4 KB (added by pkienzle, 8 years ago)

module to perform rst to html conversion

Line 
1"""
2Convert a restructured text document to html.
3
4Inline math markup can uses the *math* directive, or it can use latex
5style *\$expression\$*.  Math is rendered using simple html and unicode,
6not mathjax.
7"""
8
9import re
10from contextlib import contextmanager
11
12from docutils.core import publish_parts
13from docutils.utils.math.math2html import FormulaConfig
14from docutils.writers.html4css1 import HTMLTranslator
15from docutils.nodes import SkipNode
16
17
18# CRUFT: docutils html math doesn't support amstex tfrac, use frac instead
19if u'\\tfrac' not in FormulaConfig.hybridfunctions:
20    FormulaConfig.hybridfunctions[u'\\tfrac'] = FormulaConfig.hybridfunctions[u'\\frac']
21
22def rst2html(rst, part="whole", math_output="html"):
23    r"""
24    Convert restructured text into simple html.
25
26    Valid *math_output* formats for formulas include:
27    - html
28    - mathml
29    - mathjax
30    See `http://docutils.sourceforge.net/docs/user/config.html#math-output`_
31    for details.
32
33    The following *part* choices are available:
34    - whole: the entire html document
35    - html_body: document division with title and contents and footer
36    - body: contents only
37
38    There are other parts, but they don't make sense alone:
39
40        subtitle, version, encoding, html_prolog, header, meta,
41        html_title, title, stylesheet, html_subtitle, html_body,
42        body, head, body_suffix, fragment, docinfo, html_head,
43        head_prefix, body_prefix, footer, body_pre_docinfo, whole
44    """
45    # Ick! mathjax doesn't work properly with math-output, and the
46    # others don't work properly with math_output!
47    if math_output == "mathjax":
48        settings = { "math_output": math_output }
49    else:
50        settings = { "math-output": math_output }
51
52    # math2html and mathml do not support \frac12
53    rst = replace_compact_fraction(rst)
54
55    # mathml does not support \tfrac
56    if math_output == "mathml":
57        rst = rst.replace(r'\tfrac', r'\frac')
58
59    rst = replace_dollar(rst)
60    with suppress_html_errors():
61        parts = publish_parts(source=rst, writer_name='html',
62                              settings_overrides=settings)
63    return parts[part]
64
65@contextmanager
66def suppress_html_errors():
67    r"""
68    Context manager for keeping error reports out of the generated HTML.
69
70    Within the context, system message nodes in the docutils parse tree
71    will be ignored.  After the context, the usual behaviour will be restored.
72    """
73    visit_system_message = HTMLTranslator.visit_system_message
74    HTMLTranslator.visit_system_message = _skip_node
75    yield None
76    HTMLTranslator.visit_system_message = visit_system_message
77
78def _skip_node(self, node):
79    raise SkipNode
80
81
82_compact_fraction = re.compile(r"(\\[cdt]?frac)([0-9])([0-9])")
83def replace_compact_fraction(content):
84    r"""
85    Convert \frac12 to \frac{1}{2} for broken latex parsers
86    """
87    return _compact_fraction.sub(r"\1{\2}{\3}", content)
88
89
90_dollar = re.compile(r"(?:^|(?<=\s|[(]))[$]([^\n]*?)(?<![\\])[$](?:$|(?=\s|[.,;)\\]))")
91_notdollar = re.compile(r"\\[$]")
92def replace_dollar(content):
93    r"""
94    Convert dollar signs to inline math markup in rst.
95    """
96    content = _dollar.sub(r":math:`\1`",content)
97    content = _notdollar.sub("$", content)
98    return content
99
100
101def test_dollar():
102    assert replace_dollar(u"no dollar")==u"no dollar"
103    assert replace_dollar(u"$only$")==u":math:`only`"
104    assert replace_dollar(u"$first$ is good")==u":math:`first` is good"
105    assert replace_dollar(u"so is $last$")==u"so is :math:`last`"
106    assert replace_dollar(u"and $mid$ too")==u"and :math:`mid` too"
107    assert replace_dollar(u"$first$, $mid$, $last$")==u":math:`first`, :math:`mid`, :math:`last`"
108    assert replace_dollar(u"dollar\$ escape")==u"dollar$ escape"
109    assert replace_dollar(u"dollar \$escape\$ too")==u"dollar $escape$ too"
110    assert replace_dollar(u"spaces $in the$ math")==u"spaces :math:`in the` math"
111    assert replace_dollar(u"emb\ $ed$\ ed")==u"emb\ :math:`ed`\ ed"
112    assert replace_dollar(u"$first$a")==u"$first$a"
113    assert replace_dollar(u"a$last$")==u"a$last$"
114    assert replace_dollar(u"$37")==u"$37"
115    assert replace_dollar(u"($37)")==u"($37)"
116    assert replace_dollar(u"$37 - $43")==u"$37 - $43"
117    assert replace_dollar(u"($37, $38)")==u"($37, $38)"
118    assert replace_dollar(u"a $mid$dle a")==u"a $mid$dle a"
119    assert replace_dollar(u"a ($in parens$) a")==u"a (:math:`in parens`) a"
120    assert replace_dollar(u"a (again $in parens$) a")==u"a (again :math:`in parens`) a"
121
122if __name__ == "__main__":
123    test_dollar()