# -*- coding: utf-8 -*- # # Copyright (C) 2005-2020 Edgewall Software # Copyright (C) 2003-2006 Jonas Borgström # Copyright (C) 2004-2006 Christopher Lenz # Copyright (C) 2005-2007 Christian Boos # All rights reserved. # # This software is licensed as described in the file COPYING, which # you should have received as part of this distribution. The terms # are also available at https://trac.edgewall.org/wiki/TracLicense. # # This software consists of voluntary contributions made by many # individuals. For the exact contribution history, see the revision # history and logs, available at https://trac.edgewall.org/log/. # # Author: Jonas Borgström # Christopher Lenz # Christian Boos import re from trac.core import * from trac.notification import EMAIL_LOOKALIKE_PATTERN class WikiParser(Component): """Wiki text parser.""" # Some constants used for clarifying the Wiki regexps: BOLDITALIC_TOKEN = "'''''" BOLD_TOKEN = "'''" BOLD_TOKEN_WIKICREOLE = r"\*\*" ITALIC_TOKEN = "''" ITALIC_TOKEN_WIKICREOLE = "//" UNDERLINE_TOKEN = "__" STRIKE_TOKEN = "~~" SUBSCRIPT_TOKEN = ",," SUPERSCRIPT_TOKEN = r"\^" INLINE_TOKEN = "`" # must be a single char (see P below) STARTBLOCK_TOKEN = r"\{\{\{" STARTBLOCK = "{{{" ENDBLOCK_TOKEN = r"\}\}\}" ENDBLOCK = "}}}" BULLET_CHARS = u"-*\u2022" LINK_SCHEME = r"[a-zA-Z][-a-zA-Z0-9+._]*" # as per RFC 2396 + '_' INTERTRAC_SCHEME = r"[a-zA-Z.+-]*?" # no digits (for shorthand links) QUOTED_STRING = r"'[^']+'|\"[^\"]+\"" SHREF_TARGET_FIRST = r"[\w/?!#@](?\s])" SHREF_TARGET_LAST = r"[\w/=](?[-\w]+)''' \ r'''=(?P".*?"|'.*?'|[-,\w]+)''' def _set_anchor(name, sep): return r'=#(?P%s)(?:%s(?P[^\]]*))?' % \ (name, sep) # Sequence of regexps used by the engine _pre_rules = [ # Font styles r"(?P!?%s)" % BOLDITALIC_TOKEN, r"(?P!?%s)" % BOLD_TOKEN, r"(?P!?%s)" % BOLD_TOKEN_WIKICREOLE, r"(?P!?%s)" % ITALIC_TOKEN, r"(?P!?%s)" % ITALIC_TOKEN_WIKICREOLE, r"(?P!?%s)" % UNDERLINE_TOKEN, r"(?P!?%s)" % STRIKE_TOKEN, r"(?P!?%s)" % SUBSCRIPT_TOKEN, r"(?P!?%s)" % SUPERSCRIPT_TOKEN, r"(?P!?%s(?P.*?)%s)" \ % (STARTBLOCK_TOKEN, ENDBLOCK_TOKEN), r"(?P!?%s(?P.*?)%s)" \ % (INLINE_TOKEN, INLINE_TOKEN), ] # Rules provided by IWikiSyntaxProviders will be inserted here _post_rules = [ # WikiCreole line breaks r"(?P!?\\\\)", # e-mails r"(?P!?%s)" % EMAIL_LOOKALIKE_PATTERN, # r"(?P!?<(?P%s):(?P[^>]+)>)" % LINK_SCHEME, # &, < and > to &, < and > r"(?P[&<>])", # wiki:TracLinks or intertrac:wiki:TracLinks r"(?P!?((?P%s):(?P%s:(?:%s)|%s|%s(?:%s*%s)?)))" \ % (LINK_SCHEME, LINK_SCHEME, QUOTED_STRING, QUOTED_STRING, SHREF_TARGET_FIRST, SHREF_TARGET_MIDDLE, SHREF_TARGET_LAST), # [wiki:TracLinks with optional label] or [/relative label] (r"(?P!?\[(?:" r"(?P%s)|" % LHREF_RELATIVE_TARGET + # ./... or /... r"(?P%s):(?P%s:(?:%s)|%s|[^\]\s\%s]*))" % (LINK_SCHEME, LINK_SCHEME, QUOTED_STRING, QUOTED_STRING, u'\u200b') + # wiki:TracLinks or wiki:"trac links" or intertrac:wiki:"trac links" r"(?:[\s%s]+(?P