| 1 | # -*- coding: utf-8 -*- |
|---|
| 2 | |
|---|
| 3 | """ |
|---|
| 4 | WikiCreole to HTML converter |
|---|
| 5 | This program is an example of how the creole.py WikiCreole parser |
|---|
| 6 | can be used. |
|---|
| 7 | |
|---|
| 8 | Copyright (c) 2007, Radomir Dopieralski <creole@sheep.art.pl> |
|---|
| 9 | :copyleft: 2008 by the PyLucid team, see AUTHORS for more details. |
|---|
| 10 | |
|---|
| 11 | PyLucid Updates by the PyLucid team: |
|---|
| 12 | - Bugfixes and better html code style |
|---|
| 13 | - Add a passthrough for all django template blocktags |
|---|
| 14 | - Add a passthrough for html code lines |
|---|
| 15 | |
|---|
| 16 | All rights reserved. |
|---|
| 17 | |
|---|
| 18 | Redistribution and use in source and binary forms, with or without |
|---|
| 19 | modification, are permitted provided that the following conditions |
|---|
| 20 | are met: |
|---|
| 21 | |
|---|
| 22 | * Redistributions of source code must retain the above copyright |
|---|
| 23 | notice, this list of conditions and the following disclaimer. |
|---|
| 24 | |
|---|
| 25 | * Redistributions in binary form must reproduce the above copyright |
|---|
| 26 | notice, this list of conditions and the following disclaimer in |
|---|
| 27 | the documentation and/or other materials provided with the |
|---|
| 28 | distribution. |
|---|
| 29 | |
|---|
| 30 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|---|
| 31 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|---|
| 32 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
|---|
| 33 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|---|
| 34 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|---|
| 35 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED |
|---|
| 36 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|---|
| 37 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
|---|
| 38 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
|---|
| 39 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|---|
| 40 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|---|
| 41 | """ |
|---|
| 42 | |
|---|
| 43 | import sys, re, traceback |
|---|
| 44 | |
|---|
| 45 | from creole import Parser |
|---|
| 46 | |
|---|
| 47 | import macros |
|---|
| 48 | |
|---|
| 49 | from PyLucid.tools.utils import escape |
|---|
| 50 | |
|---|
| 51 | class Rules: |
|---|
| 52 | # For the link targets: |
|---|
| 53 | proto = r'http|https|ftp|nntp|news|mailto|telnet|file|irc' |
|---|
| 54 | extern = r'(?P<extern_addr>(?P<extern_proto>%s):.*)' % proto |
|---|
| 55 | interwiki = r''' |
|---|
| 56 | (?P<inter_wiki> [A-Z][a-zA-Z]+ ) : |
|---|
| 57 | (?P<inter_page> .* ) |
|---|
| 58 | ''' |
|---|
| 59 | |
|---|
| 60 | class HtmlEmitter: |
|---|
| 61 | """ |
|---|
| 62 | Generate HTML output for the document |
|---|
| 63 | tree consisting of DocNodes. |
|---|
| 64 | """ |
|---|
| 65 | |
|---|
| 66 | addr_re = re.compile('|'.join([ |
|---|
| 67 | Rules.extern, |
|---|
| 68 | Rules.interwiki, |
|---|
| 69 | ]), re.X | re.U) # for addresses |
|---|
| 70 | |
|---|
| 71 | def __init__(self, root, verbose=1, stderr=sys.stderr): |
|---|
| 72 | self.root = root |
|---|
| 73 | self.verbose = verbose |
|---|
| 74 | self.stderr = stderr |
|---|
| 75 | |
|---|
| 76 | def get_text(self, node): |
|---|
| 77 | """Try to emit whatever text is in the node.""" |
|---|
| 78 | try: |
|---|
| 79 | return node.children[0].content or '' |
|---|
| 80 | except: |
|---|
| 81 | return node.content or '' |
|---|
| 82 | |
|---|
| 83 | def html_escape(self, text): |
|---|
| 84 | return escape(text) |
|---|
| 85 | #return text.replace('&', '&').replace('<', '<').replace('>', '>') |
|---|
| 86 | |
|---|
| 87 | def attr_escape(self, text): |
|---|
| 88 | return self.html_escape(text).replace('"', '"') |
|---|
| 89 | |
|---|
| 90 | # *_emit methods for emitting nodes of the document: |
|---|
| 91 | |
|---|
| 92 | def document_emit(self, node): |
|---|
| 93 | return self.emit_children(node) |
|---|
| 94 | |
|---|
| 95 | def text_emit(self, node): |
|---|
| 96 | return self.html_escape(node.content) |
|---|
| 97 | |
|---|
| 98 | def separator_emit(self, node): |
|---|
| 99 | return u'<hr />\n'; |
|---|
| 100 | |
|---|
| 101 | def paragraph_emit(self, node): |
|---|
| 102 | return u'<p>%s</p>\n' % self.emit_children(node) |
|---|
| 103 | |
|---|
| 104 | def _list_emit(self, node, list_type): |
|---|
| 105 | if node.parent.kind in ("document",): |
|---|
| 106 | # The first list item |
|---|
| 107 | formatter = u'' |
|---|
| 108 | else: |
|---|
| 109 | formatter = u'\n' |
|---|
| 110 | |
|---|
| 111 | if list_type == "li": |
|---|
| 112 | formatter += ( |
|---|
| 113 | u'%(i)s<%(t)s>%(c)s</%(t)s>' |
|---|
| 114 | ) |
|---|
| 115 | else: |
|---|
| 116 | formatter += ( |
|---|
| 117 | u'%(i)s<%(t)s>%(c)s\n' |
|---|
| 118 | '%(i)s</%(t)s>' |
|---|
| 119 | ) |
|---|
| 120 | return formatter % { |
|---|
| 121 | "i": "\t"*node.level, |
|---|
| 122 | "c": self.emit_children(node), |
|---|
| 123 | "t": list_type, |
|---|
| 124 | } |
|---|
| 125 | |
|---|
| 126 | def bullet_list_emit(self, node): |
|---|
| 127 | return self._list_emit(node, list_type=u"ul") |
|---|
| 128 | |
|---|
| 129 | def number_list_emit(self, node): |
|---|
| 130 | return self._list_emit(node, list_type=u"ol") |
|---|
| 131 | |
|---|
| 132 | def list_item_emit(self, node): |
|---|
| 133 | return self._list_emit(node, list_type=u"li") |
|---|
| 134 | |
|---|
| 135 | def table_emit(self, node): |
|---|
| 136 | return u'<table>\n%s</table>\n' % self.emit_children(node) |
|---|
| 137 | |
|---|
| 138 | def table_row_emit(self, node): |
|---|
| 139 | return u'<tr>\n%s</tr>\n' % self.emit_children(node) |
|---|
| 140 | |
|---|
| 141 | def table_cell_emit(self, node): |
|---|
| 142 | return u'\t<td>%s</td>\n' % self.emit_children(node) |
|---|
| 143 | |
|---|
| 144 | def table_head_emit(self, node): |
|---|
| 145 | return u'\t<th>%s</th>\n' % self.emit_children(node) |
|---|
| 146 | |
|---|
| 147 | def emphasis_emit(self, node): |
|---|
| 148 | return u'<i>%s</i>' % self.emit_children(node) |
|---|
| 149 | |
|---|
| 150 | def strong_emit(self, node): |
|---|
| 151 | return u'<strong>%s</strong>' % self.emit_children(node) |
|---|
| 152 | |
|---|
| 153 | def header_emit(self, node): |
|---|
| 154 | return u'<h%d>%s</h%d>\n' % ( |
|---|
| 155 | node.level, self.html_escape(node.content), node.level) |
|---|
| 156 | |
|---|
| 157 | def code_emit(self, node): |
|---|
| 158 | return u'<tt>%s</tt>' % self.html_escape(node.content) |
|---|
| 159 | |
|---|
| 160 | def link_emit(self, node): |
|---|
| 161 | target = node.content |
|---|
| 162 | if node.children: |
|---|
| 163 | inside = self.emit_children(node) |
|---|
| 164 | else: |
|---|
| 165 | inside = self.html_escape(target) |
|---|
| 166 | m = self.addr_re.match(target) |
|---|
| 167 | if m: |
|---|
| 168 | if m.group('extern_addr'): |
|---|
| 169 | return u'<a href="%s">%s</a>' % ( |
|---|
| 170 | self.attr_escape(target), inside) |
|---|
| 171 | elif m.group('inter_wiki'): |
|---|
| 172 | raise NotImplementedError |
|---|
| 173 | return u'<a href="%s">%s</a>' % ( |
|---|
| 174 | self.attr_escape(target), inside) |
|---|
| 175 | |
|---|
| 176 | def image_emit(self, node): |
|---|
| 177 | target = node.content |
|---|
| 178 | text = self.get_text(node) |
|---|
| 179 | m = self.addr_re.match(target) |
|---|
| 180 | if m: |
|---|
| 181 | if m.group('extern_addr'): |
|---|
| 182 | return u'<img src="%s" alt="%s">' % ( |
|---|
| 183 | self.attr_escape(target), self.attr_escape(text)) |
|---|
| 184 | elif m.group('inter_wiki'): |
|---|
| 185 | raise NotImplementedError |
|---|
| 186 | return u'<img src="%s" alt="%s">' % ( |
|---|
| 187 | self.attr_escape(target), self.attr_escape(text)) |
|---|
| 188 | |
|---|
| 189 | def macro_emit(self, node): |
|---|
| 190 | #print node.debug() |
|---|
| 191 | macro_name = node.macro_name |
|---|
| 192 | try: |
|---|
| 193 | macro = getattr(macros, macro_name) |
|---|
| 194 | except AttributeError, e: |
|---|
| 195 | return self.error( |
|---|
| 196 | u"Macro '%s' doesn't exist" % macro_name, |
|---|
| 197 | handle_traceback = True |
|---|
| 198 | ) |
|---|
| 199 | |
|---|
| 200 | try: |
|---|
| 201 | result = macro(args=node.macro_args, text=node.content) |
|---|
| 202 | except Exception, err: |
|---|
| 203 | return self.error( |
|---|
| 204 | u"Macro '%s' error: %s" % (macro_name, err), |
|---|
| 205 | handle_traceback = True |
|---|
| 206 | ) |
|---|
| 207 | |
|---|
| 208 | if not isinstance(result, unicode): |
|---|
| 209 | msg = u"Macro '%s' doesn't return a unicode string!" % macro_name |
|---|
| 210 | if self.verbose>1: |
|---|
| 211 | msg += " - returns: %r, type %r" % (result, type(result)) |
|---|
| 212 | return self.error(msg) |
|---|
| 213 | |
|---|
| 214 | return result |
|---|
| 215 | |
|---|
| 216 | def break_emit(self, node): |
|---|
| 217 | if node.parent.kind == "list_item": |
|---|
| 218 | return u"<br />\n" + "\t"*node.parent.level |
|---|
| 219 | elif node.parent.kind in ("table_head", "table_cell"): |
|---|
| 220 | return u"<br />\n\t\t" |
|---|
| 221 | else: |
|---|
| 222 | return u"<br />\n" |
|---|
| 223 | |
|---|
| 224 | def line_emit(self, node): |
|---|
| 225 | return u"\n" |
|---|
| 226 | |
|---|
| 227 | def preformatted_emit(self, node): |
|---|
| 228 | return u"<pre>\n%s\n</pre>\n" % self.html_escape(node.content) |
|---|
| 229 | |
|---|
| 230 | def pass_block_emit(self, node): |
|---|
| 231 | """ Pass-through all django template blocktags and html code lines """ |
|---|
| 232 | return node.content + "\n" |
|---|
| 233 | pass_line_emit = pass_block_emit |
|---|
| 234 | html_emit = pass_block_emit |
|---|
| 235 | |
|---|
| 236 | def pass_inline_emit(self, node): |
|---|
| 237 | """ Pass-through all django template tags """ |
|---|
| 238 | return node.content |
|---|
| 239 | |
|---|
| 240 | def default_emit(self, node): |
|---|
| 241 | """Fallback function for emitting unknown nodes.""" |
|---|
| 242 | raise NotImplementedError("Node '%s' unknown" % node.kind) |
|---|
| 243 | |
|---|
| 244 | def emit_children(self, node): |
|---|
| 245 | """Emit all the children of a node.""" |
|---|
| 246 | return u''.join([self.emit_node(child) for child in node.children]) |
|---|
| 247 | |
|---|
| 248 | def emit_node(self, node): |
|---|
| 249 | """Emit a single node.""" |
|---|
| 250 | #print "%s_emit: %r" % (node.kind, node.content) |
|---|
| 251 | emit = getattr(self, '%s_emit' % node.kind, self.default_emit) |
|---|
| 252 | return emit(node) |
|---|
| 253 | |
|---|
| 254 | def emit(self): |
|---|
| 255 | """Emit the document represented by self.root DOM tree.""" |
|---|
| 256 | return self.emit_node(self.root) |
|---|
| 257 | |
|---|
| 258 | def error(self, text, handle_traceback=False): |
|---|
| 259 | """ |
|---|
| 260 | Error Handling. |
|---|
| 261 | """ |
|---|
| 262 | if self.verbose>1 and handle_traceback: |
|---|
| 263 | self.stderr.write( |
|---|
| 264 | "<pre>%s</pre>" % traceback.format_exc() |
|---|
| 265 | ) |
|---|
| 266 | |
|---|
| 267 | if self.verbose>0: |
|---|
| 268 | return u"[Error: %s]" % text |
|---|
| 269 | else: |
|---|
| 270 | # No error output |
|---|
| 271 | return u"" |
|---|
| 272 | |
|---|
| 273 | if __name__=="__main__": |
|---|
| 274 | txt = r"""== a headline |
|---|
| 275 | |
|---|
| 276 | Here is [[a internal]] link. |
|---|
| 277 | This is [[http://domain.tld|external links]]. |
|---|
| 278 | A [[internal links|different]] link name. |
|---|
| 279 | |
|---|
| 280 | Basics: **bold** or //italic// |
|---|
| 281 | or **//both//** or //**both**// |
|---|
| 282 | Force\\linebreak. |
|---|
| 283 | |
|---|
| 284 | The current page name: >{{ PAGE.name }}< great? |
|---|
| 285 | A {% lucidTag page_update_list count=10 %} PyLucid plugin |
|---|
| 286 | |
|---|
| 287 | {% sourcecode py %} |
|---|
| 288 | import sys |
|---|
| 289 | |
|---|
| 290 | sys.stdout("Hello World!") |
|---|
| 291 | {% endsourcecode %} |
|---|
| 292 | |
|---|
| 293 | A [[www.domain.tld|link]]. |
|---|
| 294 | a {{/image.jpg|My Image}} image |
|---|
| 295 | |
|---|
| 296 | no image: {{ foo|bar }}! |
|---|
| 297 | picture [[www.domain.tld | {{ foo.JPG | Foo }} ]] as a link |
|---|
| 298 | |
|---|
| 299 | END""" |
|---|
| 300 | |
|---|
| 301 | |
|---|
| 302 | txt = r""" |
|---|
| 303 | ==== Headline 1 |
|---|
| 304 | |
|---|
| 305 | On {% a tag 1 %} line |
|---|
| 306 | line two |
|---|
| 307 | |
|---|
| 308 | ==== Headline 2 |
|---|
| 309 | |
|---|
| 310 | {% a tag 2 %} |
|---|
| 311 | |
|---|
| 312 | A block: |
|---|
| 313 | {% block %} |
|---|
| 314 | <Foo:> {{ Bar }} |
|---|
| 315 | {% endblock %} |
|---|
| 316 | end block |
|---|
| 317 | |
|---|
| 318 | {% block1 arg="jo" %} |
|---|
| 319 | eofjwqp |
|---|
| 320 | {% endblock1 %} |
|---|
| 321 | |
|---|
| 322 | A block without the right end block: |
|---|
| 323 | {% block1 %} |
|---|
| 324 | 111 |
|---|
| 325 | {% endblock2 %} |
|---|
| 326 | BBB |
|---|
| 327 | |
|---|
| 328 | A block without endblock: |
|---|
| 329 | {% block3 %} |
|---|
| 330 | 222 |
|---|
| 331 | {% block3 %} |
|---|
| 332 | CCC |
|---|
| 333 | |
|---|
| 334 | the end""" |
|---|
| 335 | |
|---|
| 336 | print "-"*80 |
|---|
| 337 | # from creole_alt.creole import Parser |
|---|
| 338 | p = Parser(txt) |
|---|
| 339 | document = p.parse() |
|---|
| 340 | p.debug() |
|---|
| 341 | |
|---|
| 342 | print HtmlEmitter(document).emit() |
|---|