| 1 | # -*- coding: utf-8 -*-
|
|---|
| 2 | #
|
|---|
| 3 | # Copyright (C) 2005-2023 Edgewall Software
|
|---|
| 4 | # Copyright (C) 2003-2006 Jonas Borgström <jonas@edgewall.com>
|
|---|
| 5 | # Copyright (C) 2004-2006 Christopher Lenz <cmlenz@gmx.de>
|
|---|
| 6 | # Copyright (C) 2005-2007 Christian Boos <cboos@edgewall.org>
|
|---|
| 7 | # All rights reserved.
|
|---|
| 8 | #
|
|---|
| 9 | # This software is licensed as described in the file COPYING, which
|
|---|
| 10 | # you should have received as part of this distribution. The terms
|
|---|
| 11 | # are also available at https://trac.edgewall.org/wiki/TracLicense.
|
|---|
| 12 | #
|
|---|
| 13 | # This software consists of voluntary contributions made by many
|
|---|
| 14 | # individuals. For the exact contribution history, see the revision
|
|---|
| 15 | # history and logs, available at https://trac.edgewall.org/log/.
|
|---|
| 16 | #
|
|---|
| 17 | # Author: Jonas Borgström <jonas@edgewall.com>
|
|---|
| 18 | # Christopher Lenz <cmlenz@gmx.de>
|
|---|
| 19 | # Christian Boos <cboos@edgewall.org>
|
|---|
| 20 |
|
|---|
| 21 | import re
|
|---|
| 22 |
|
|---|
| 23 | from trac.core import *
|
|---|
| 24 | from trac.notification import EMAIL_LOOKALIKE_PATTERN
|
|---|
| 25 |
|
|---|
| 26 |
|
|---|
| 27 | class WikiParser(Component):
|
|---|
| 28 | """Wiki text parser."""
|
|---|
| 29 |
|
|---|
| 30 | # Some constants used for clarifying the Wiki regexps:
|
|---|
| 31 |
|
|---|
| 32 | BOLDITALIC_TOKEN = "'''''"
|
|---|
| 33 | BOLD_TOKEN = "'''"
|
|---|
| 34 | BOLD_TOKEN_WIKICREOLE = r"\*\*"
|
|---|
| 35 | ITALIC_TOKEN = "''"
|
|---|
| 36 | ITALIC_TOKEN_WIKICREOLE = "//"
|
|---|
| 37 | UNDERLINE_TOKEN = "__"
|
|---|
| 38 | STRIKE_TOKEN = "~~"
|
|---|
| 39 | SUBSCRIPT_TOKEN = ",,"
|
|---|
| 40 | SUPERSCRIPT_TOKEN = r"\^"
|
|---|
| 41 | INLINE_TOKEN = "`" # must be a single char (see P<definition> below)
|
|---|
| 42 | STARTBLOCK_TOKEN = r"\{\{\{"
|
|---|
| 43 | STARTBLOCK = "{{{"
|
|---|
| 44 | ENDBLOCK_TOKEN = r"\}\}\}"
|
|---|
| 45 | ENDBLOCK = "}}}"
|
|---|
| 46 | BULLET_CHARS = "-*\u2022"
|
|---|
| 47 |
|
|---|
| 48 | LINK_SCHEME = r"[a-zA-Z][-a-zA-Z0-9+._]*" # as per RFC 2396 + '_'
|
|---|
| 49 | INTERTRAC_SCHEME = r"[a-zA-Z.+-]*?" # no digits (for shorthand links)
|
|---|
| 50 |
|
|---|
| 51 | QUOTED_STRING = r"'[^']+'|\"[^\"]+\""
|
|---|
| 52 |
|
|---|
| 53 | SHREF_TARGET_FIRST = r"[\w/?!#@](?<!_)" # we don't want "_"
|
|---|
| 54 | SHREF_TARGET_MIDDLE = r"(?:\|(?=[^|\s])|[^|<>\s])"
|
|---|
| 55 | SHREF_TARGET_LAST = r"[\w/=](?<!_)" # we don't want "_"
|
|---|
| 56 |
|
|---|
| 57 | def _lhref_relative_target(sep):
|
|---|
| 58 | return r"[/\?#][^%s\]]*|\.\.?(?:[/\?#][^%s\]]*)?" % (sep, sep)
|
|---|
| 59 |
|
|---|
| 60 | LHREF_RELATIVE_TARGET = _lhref_relative_target(r'\s')
|
|---|
| 61 |
|
|---|
| 62 | XML_NAME = r"[\w:](?<!\d)[\w:.-]*?" # See http://www.w3.org/TR/REC-xml/#id
|
|---|
| 63 |
|
|---|
| 64 | PROCESSOR = r"(\s*)#\!([\w+-][\w+-/]*)"
|
|---|
| 65 | PROCESSOR_PARAM = r'''(?P<proc_pname>[-\w]+)''' \
|
|---|
| 66 | r'''=(?P<proc_pval>".*?"|'.*?'|[-,\w]+)'''
|
|---|
| 67 |
|
|---|
| 68 | def _set_anchor(name, sep):
|
|---|
| 69 | return r'=#(?P<anchorname>%s)(?:%s(?P<anchorlabel>[^\]]*))?' % \
|
|---|
| 70 | (name, sep)
|
|---|
| 71 |
|
|---|
| 72 | # Sequence of regexps used by the engine
|
|---|
| 73 |
|
|---|
| 74 | _pre_rules = [
|
|---|
| 75 | # Font styles
|
|---|
| 76 | r"(?P<bolditalic>!?%s)" % BOLDITALIC_TOKEN,
|
|---|
| 77 | r"(?P<bold>!?%s)" % BOLD_TOKEN,
|
|---|
| 78 | r"(?P<bold_wc>!?%s)" % BOLD_TOKEN_WIKICREOLE,
|
|---|
| 79 | r"(?P<italic>!?%s)" % ITALIC_TOKEN,
|
|---|
| 80 | r"(?P<italic_wc>!?%s)" % ITALIC_TOKEN_WIKICREOLE,
|
|---|
| 81 | r"(?P<underline>!?%s)" % UNDERLINE_TOKEN,
|
|---|
| 82 | r"(?P<strike>!?%s)" % STRIKE_TOKEN,
|
|---|
| 83 | r"(?P<subscript>!?%s)" % SUBSCRIPT_TOKEN,
|
|---|
| 84 | r"(?P<superscript>!?%s)" % SUPERSCRIPT_TOKEN,
|
|---|
| 85 | r"(?P<inlinecode>!?%s(?P<inline>.*?)%s)" \
|
|---|
| 86 | % (STARTBLOCK_TOKEN, ENDBLOCK_TOKEN),
|
|---|
| 87 | r"(?P<inlinecode2>!?%s(?P<inline2>.*?)%s)" \
|
|---|
| 88 | % (INLINE_TOKEN, INLINE_TOKEN),
|
|---|
| 89 | ]
|
|---|
| 90 |
|
|---|
| 91 | # Rules provided by IWikiSyntaxProviders will be inserted here
|
|---|
| 92 |
|
|---|
| 93 | _post_rules = [
|
|---|
| 94 | # WikiCreole line breaks
|
|---|
| 95 | r"(?P<linebreak_wc>!?\\\\)",
|
|---|
| 96 | # e-mails
|
|---|
| 97 | r"(?P<email>!?%s)" % EMAIL_LOOKALIKE_PATTERN,
|
|---|
| 98 | # <wiki:Trac bracket links>
|
|---|
| 99 | r"(?P<shrefbr>!?<(?P<snsbr>%s):(?P<stgtbr>[^>]+)>)" % LINK_SCHEME,
|
|---|
| 100 | # &, < and > to &, < and >
|
|---|
| 101 | r"(?P<htmlescape>[&<>])",
|
|---|
| 102 | # wiki:TracLinks or intertrac:wiki:TracLinks
|
|---|
| 103 | r"(?P<shref>!?((?P<sns>%s):(?P<stgt>%s:(?:%s)|%s|%s(?:%s*%s)?)))" \
|
|---|
| 104 | % (LINK_SCHEME, LINK_SCHEME, QUOTED_STRING, QUOTED_STRING,
|
|---|
| 105 | SHREF_TARGET_FIRST, SHREF_TARGET_MIDDLE, SHREF_TARGET_LAST),
|
|---|
| 106 | # [wiki:TracLinks with optional label] or [/relative label]
|
|---|
| 107 | (r"(?P<lhref>!?\[(?:"
|
|---|
| 108 | r"(?P<rel>%s)|" % LHREF_RELATIVE_TARGET + # ./... or /...
|
|---|
| 109 | r"(?P<lns>%s):(?P<ltgt>%s:(?:%s)|%s|[^\]\s\%s]*))" %
|
|---|
| 110 | (LINK_SCHEME, LINK_SCHEME, QUOTED_STRING, QUOTED_STRING, '\u200b') +
|
|---|
| 111 | # wiki:TracLinks or wiki:"trac links" or intertrac:wiki:"trac links"
|
|---|
| 112 | r"(?:[\s%s]+(?P<label>%s|[^\]]*))?\])" %
|
|---|
| 113 | ('\u200b', QUOTED_STRING)), # trailing space, optional label
|
|---|
| 114 | # [=#anchor] creation
|
|---|
| 115 | r"(?P<anchor>!?\[%s\])" % _set_anchor(XML_NAME, r'\s+'),
|
|---|
| 116 | # [[macro]] call or [[WikiCreole link]]
|
|---|
| 117 | (r"(?P<macrolink>!?\[\[(?:[^]]|][^]])+\]\])"),
|
|---|
| 118 | # == heading == #hanchor
|
|---|
| 119 | r"(?P<heading>^\s*(?P<hdepth>={1,6})\s(?P<htext>.*?)"
|
|---|
| 120 | r"(?P<hanchor>#%s)?\s*$)" % XML_NAME,
|
|---|
| 121 | # * list
|
|---|
| 122 | r"(?P<list>^(?P<ldepth>\s*)"
|
|---|
| 123 | r"(?:[%s]|(?P<lstart>[0-9]+|[a-zA-Z]|[ivxIVX]{1,5})\.)\s)"
|
|---|
| 124 | % BULLET_CHARS,
|
|---|
| 125 | # definition::
|
|---|
| 126 | r"(?P<definition>^\s+"
|
|---|
| 127 | r"((?:%s[^%s]*%s|%s(?:%s{,2}[^%s])*?%s|[^%s%s:]|:[^:])+::)(?:\s+|$))"
|
|---|
| 128 | % (INLINE_TOKEN, INLINE_TOKEN, INLINE_TOKEN,
|
|---|
| 129 | STARTBLOCK_TOKEN, ENDBLOCK[0], ENDBLOCK[0], ENDBLOCK_TOKEN,
|
|---|
| 130 | INLINE_TOKEN, STARTBLOCK[0]),
|
|---|
| 131 | # |- row separator
|
|---|
| 132 | r"(?P<table_row_sep>!?\s*\|-+\s*"
|
|---|
| 133 | r"(?P<table_row_params>%s\s*)*)" % PROCESSOR_PARAM,
|
|---|
| 134 | # (leading space)
|
|---|
| 135 | r"(?P<indent>^(?P<idepth>\s+)(?=\S))",
|
|---|
| 136 | # || table ||
|
|---|
| 137 | r"(?P<table_cell>!?(?P<table_cell_sep>=?(?:\|\|)+=?)"
|
|---|
| 138 | r"(?P<table_cell_last>\s*\\?$)?)",
|
|---|
| 139 | ]
|
|---|
| 140 |
|
|---|
| 141 | _processor_re = re.compile(PROCESSOR)
|
|---|
| 142 | _startblock_re = re.compile(r"\s*%s(?:%s|\s*$)" %
|
|---|
| 143 | (STARTBLOCK, PROCESSOR))
|
|---|
| 144 | _processor_param_re = re.compile(PROCESSOR_PARAM)
|
|---|
| 145 | _anchor_re = re.compile(r'[^\w:.-]+', re.UNICODE)
|
|---|
| 146 |
|
|---|
| 147 | _macro_re = re.compile(r'''
|
|---|
| 148 | (?P<macroname> [\w/+-]+ \?? | \? ) # macro, macro? or ?
|
|---|
| 149 | (?: \( (?P<macroargs> .*? ) \) )? $ # optional arguments within ()
|
|---|
| 150 | ''', re.VERBOSE)
|
|---|
| 151 |
|
|---|
| 152 | _creolelink_re = re.compile(r'''
|
|---|
| 153 | (?:
|
|---|
| 154 | (?P<rel> %(rel)s ) # rel is "./..." or "/..."
|
|---|
| 155 | | (?: (?P<lns> %(scheme)s ) : )? # lns is the optional "scheme:"
|
|---|
| 156 | (?P<ltgt> # ltgt is the optional target
|
|---|
| 157 | %(scheme)s : (?:%(quoted)s) # - "scheme:'...quoted..'"
|
|---|
| 158 | | %(quoted)s # - "'...quoted...'"
|
|---|
| 159 | | [^|]+ # - anything but a '|'
|
|---|
| 160 | )?
|
|---|
| 161 | )
|
|---|
| 162 | \s* (?: \| (?P<label> .* ) )? # optional label after a '|'
|
|---|
| 163 | $
|
|---|
| 164 | ''' % {'rel': _lhref_relative_target(r'|'),
|
|---|
| 165 | 'scheme': LINK_SCHEME,
|
|---|
| 166 | 'quoted': QUOTED_STRING}, re.VERBOSE)
|
|---|
| 167 |
|
|---|
| 168 | _set_anchor_wc_re = re.compile(_set_anchor(XML_NAME, r'\|\s*') + r'$')
|
|---|
| 169 |
|
|---|
| 170 | def __init__(self):
|
|---|
| 171 | self._compiled_rules = None
|
|---|
| 172 | self._link_resolvers = None
|
|---|
| 173 | self._helper_patterns = None
|
|---|
| 174 | self._external_handlers = None
|
|---|
| 175 |
|
|---|
| 176 | @property
|
|---|
| 177 | def rules(self):
|
|---|
| 178 | self._prepare_rules()
|
|---|
| 179 | return self._compiled_rules
|
|---|
| 180 |
|
|---|
| 181 | @property
|
|---|
| 182 | def helper_patterns(self):
|
|---|
| 183 | self._prepare_rules()
|
|---|
| 184 | return self._helper_patterns
|
|---|
| 185 |
|
|---|
| 186 | @property
|
|---|
| 187 | def external_handlers(self):
|
|---|
| 188 | self._prepare_rules()
|
|---|
| 189 | return self._external_handlers
|
|---|
| 190 |
|
|---|
| 191 | def _prepare_rules(self):
|
|---|
| 192 | from trac.wiki.api import WikiSystem
|
|---|
| 193 | if not self._compiled_rules:
|
|---|
| 194 | helpers = []
|
|---|
| 195 | handlers = {}
|
|---|
| 196 | syntax = self._pre_rules[:]
|
|---|
| 197 | i = 0
|
|---|
| 198 | for resolver in WikiSystem(self.env).syntax_providers:
|
|---|
| 199 | for regexp, handler in resolver.get_wiki_syntax() or []:
|
|---|
| 200 | handlers['i' + str(i)] = handler
|
|---|
| 201 | syntax.append('(?P<i%d>%s)' % (i, regexp))
|
|---|
| 202 | i += 1
|
|---|
| 203 | syntax += self._post_rules[:]
|
|---|
| 204 | helper_re = re.compile(r'\?P<([a-z\d_]+)>')
|
|---|
| 205 | for rule in syntax:
|
|---|
| 206 | helpers += helper_re.findall(rule)[1:]
|
|---|
| 207 | rules = re.compile('(?:' + '|'.join(syntax) + ')', re.UNICODE)
|
|---|
| 208 | self._external_handlers = handlers
|
|---|
| 209 | self._helper_patterns = helpers
|
|---|
| 210 | self._compiled_rules = rules
|
|---|
| 211 |
|
|---|
| 212 | @property
|
|---|
| 213 | def link_resolvers(self):
|
|---|
| 214 | if not self._link_resolvers:
|
|---|
| 215 | from trac.wiki.api import WikiSystem
|
|---|
| 216 | resolvers = {}
|
|---|
| 217 | for resolver in WikiSystem(self.env).syntax_providers:
|
|---|
| 218 | for namespace, handler in resolver.get_link_resolvers() or []:
|
|---|
| 219 | resolvers[namespace] = handler
|
|---|
| 220 | self._link_resolvers = resolvers
|
|---|
| 221 | return self._link_resolvers
|
|---|
| 222 |
|
|---|
| 223 | def parse(self, wikitext):
|
|---|
| 224 | """Parse `wikitext` and produce a WikiDOM tree."""
|
|---|
| 225 | # obviously still some work to do here ;)
|
|---|
| 226 | return wikitext
|
|---|
| 227 |
|
|---|
| 228 |
|
|---|
| 229 | _processor_pname_re = re.compile(r'[-\w]+$')
|
|---|
| 230 |
|
|---|
| 231 |
|
|---|
| 232 | def parse_processor_args(processor_args):
|
|---|
| 233 | """Parse a string containing parameter assignments,
|
|---|
| 234 | and return the corresponding dictionary.
|
|---|
| 235 |
|
|---|
| 236 | Isolated keywords are interpreted as `bool` flags, `False` if the keyword
|
|---|
| 237 | is prefixed with "-", `True` otherwise.
|
|---|
| 238 |
|
|---|
| 239 | >>> parse_processor_args('ab="c de -f gh=ij" -')
|
|---|
| 240 | {'ab': 'c de -f gh=ij'}
|
|---|
| 241 |
|
|---|
| 242 | >>> sorted(parse_processor_args('ab=c de -f gh="ij klmn" p=q-r,s').items())
|
|---|
| 243 | [('ab', 'c'), ('de', True), ('f', False), ('gh', 'ij klmn'), ('p', 'q-r,s')]
|
|---|
| 244 |
|
|---|
| 245 | >>> args = 'data-name=foo-bar data-true -data-false'
|
|---|
| 246 | >>> sorted(parse_processor_args(args).items())
|
|---|
| 247 | [('data-false', False), ('data-name', 'foo-bar'), ('data-true', True)]
|
|---|
| 248 | """
|
|---|
| 249 | args = WikiParser._processor_param_re.split(processor_args)
|
|---|
| 250 | keys = [str(k) for k in args[1::3]] # used as keyword parameters
|
|---|
| 251 | values = [v[1:-1] if v[:1] + v[-1:] in ('""', "''") else v
|
|---|
| 252 | for v in args[2::3]]
|
|---|
| 253 | for flags in args[::3]:
|
|---|
| 254 | for flag in flags.strip().split():
|
|---|
| 255 | if _processor_pname_re.match(flag):
|
|---|
| 256 | if flag[0] == '-':
|
|---|
| 257 | if len(flag) > 1:
|
|---|
| 258 | keys.append(str(flag[1:]))
|
|---|
| 259 | values.append(False)
|
|---|
| 260 | else:
|
|---|
| 261 | keys.append(str(flag))
|
|---|
| 262 | values.append(True)
|
|---|
| 263 | return dict(zip(keys, values))
|
|---|