Edgewall Software

source: trunk/trac/wiki/parser.py

Last change on this file was 17657, checked in by Jun Omae, 8 months ago

1.5.4dev: update copyright year to 2023 (refs #13402)

[skip ci]

  • Property svn:eol-style set to native
File size: 9.9 KB
Line 
1# -*- coding: utf-8 -*-
2#
3# Copyright (C) 2005-2023 Edgewall Software
4# Copyright (C) 2003-2006 Jonas Borgström <jonas@edgewall.com>
5# Copyright (C) 2004-2006 Christopher Lenz <cmlenz@gmx.de>
6# Copyright (C) 2005-2007 Christian Boos <cboos@edgewall.org>
7# All rights reserved.
8#
9# This software is licensed as described in the file COPYING, which
10# you should have received as part of this distribution. The terms
11# are also available at https://trac.edgewall.org/wiki/TracLicense.
12#
13# This software consists of voluntary contributions made by many
14# individuals. For the exact contribution history, see the revision
15# history and logs, available at https://trac.edgewall.org/log/.
16#
17# Author: Jonas Borgström <jonas@edgewall.com>
18# Christopher Lenz <cmlenz@gmx.de>
19# Christian Boos <cboos@edgewall.org>
20
21import re
22
23from trac.core import *
24from trac.notification import EMAIL_LOOKALIKE_PATTERN
25
26
27class WikiParser(Component):
28 """Wiki text parser."""
29
30 # Some constants used for clarifying the Wiki regexps:
31
32 BOLDITALIC_TOKEN = "'''''"
33 BOLD_TOKEN = "'''"
34 BOLD_TOKEN_WIKICREOLE = r"\*\*"
35 ITALIC_TOKEN = "''"
36 ITALIC_TOKEN_WIKICREOLE = "//"
37 UNDERLINE_TOKEN = "__"
38 STRIKE_TOKEN = "~~"
39 SUBSCRIPT_TOKEN = ",,"
40 SUPERSCRIPT_TOKEN = r"\^"
41 INLINE_TOKEN = "`" # must be a single char (see P<definition> below)
42 STARTBLOCK_TOKEN = r"\{\{\{"
43 STARTBLOCK = "{{{"
44 ENDBLOCK_TOKEN = r"\}\}\}"
45 ENDBLOCK = "}}}"
46 BULLET_CHARS = "-*\u2022"
47
48 LINK_SCHEME = r"[a-zA-Z][-a-zA-Z0-9+._]*" # as per RFC 2396 + '_'
49 INTERTRAC_SCHEME = r"[a-zA-Z.+-]*?" # no digits (for shorthand links)
50
51 QUOTED_STRING = r"'[^']+'|\"[^\"]+\""
52
53 SHREF_TARGET_FIRST = r"[\w/?!#@](?<!_)" # we don't want "_"
54 SHREF_TARGET_MIDDLE = r"(?:\|(?=[^|\s])|[^|<>\s])"
55 SHREF_TARGET_LAST = r"[\w/=](?<!_)" # we don't want "_"
56
57 def _lhref_relative_target(sep):
58 return r"[/\?#][^%s\]]*|\.\.?(?:[/\?#][^%s\]]*)?" % (sep, sep)
59
60 LHREF_RELATIVE_TARGET = _lhref_relative_target(r'\s')
61
62 XML_NAME = r"[\w:](?<!\d)[\w:.-]*?" # See http://www.w3.org/TR/REC-xml/#id
63
64 PROCESSOR = r"(\s*)#\!([\w+-][\w+-/]*)"
65 PROCESSOR_PARAM = r'''(?P<proc_pname>[-\w]+)''' \
66 r'''=(?P<proc_pval>".*?"|'.*?'|[-,\w]+)'''
67
68 def _set_anchor(name, sep):
69 return r'=#(?P<anchorname>%s)(?:%s(?P<anchorlabel>[^\]]*))?' % \
70 (name, sep)
71
72 # Sequence of regexps used by the engine
73
74 _pre_rules = [
75 # Font styles
76 r"(?P<bolditalic>!?%s)" % BOLDITALIC_TOKEN,
77 r"(?P<bold>!?%s)" % BOLD_TOKEN,
78 r"(?P<bold_wc>!?%s)" % BOLD_TOKEN_WIKICREOLE,
79 r"(?P<italic>!?%s)" % ITALIC_TOKEN,
80 r"(?P<italic_wc>!?%s)" % ITALIC_TOKEN_WIKICREOLE,
81 r"(?P<underline>!?%s)" % UNDERLINE_TOKEN,
82 r"(?P<strike>!?%s)" % STRIKE_TOKEN,
83 r"(?P<subscript>!?%s)" % SUBSCRIPT_TOKEN,
84 r"(?P<superscript>!?%s)" % SUPERSCRIPT_TOKEN,
85 r"(?P<inlinecode>!?%s(?P<inline>.*?)%s)" \
86 % (STARTBLOCK_TOKEN, ENDBLOCK_TOKEN),
87 r"(?P<inlinecode2>!?%s(?P<inline2>.*?)%s)" \
88 % (INLINE_TOKEN, INLINE_TOKEN),
89 ]
90
91 # Rules provided by IWikiSyntaxProviders will be inserted here
92
93 _post_rules = [
94 # WikiCreole line breaks
95 r"(?P<linebreak_wc>!?\\\\)",
96 # e-mails
97 r"(?P<email>!?%s)" % EMAIL_LOOKALIKE_PATTERN,
98 # <wiki:Trac bracket links>
99 r"(?P<shrefbr>!?<(?P<snsbr>%s):(?P<stgtbr>[^>]+)>)" % LINK_SCHEME,
100 # &, < and > to &amp;, &lt; and &gt;
101 r"(?P<htmlescape>[&<>])",
102 # wiki:TracLinks or intertrac:wiki:TracLinks
103 r"(?P<shref>!?((?P<sns>%s):(?P<stgt>%s:(?:%s)|%s|%s(?:%s*%s)?)))" \
104 % (LINK_SCHEME, LINK_SCHEME, QUOTED_STRING, QUOTED_STRING,
105 SHREF_TARGET_FIRST, SHREF_TARGET_MIDDLE, SHREF_TARGET_LAST),
106 # [wiki:TracLinks with optional label] or [/relative label]
107 (r"(?P<lhref>!?\[(?:"
108 r"(?P<rel>%s)|" % LHREF_RELATIVE_TARGET + # ./... or /...
109 r"(?P<lns>%s):(?P<ltgt>%s:(?:%s)|%s|[^\]\s\%s]*))" %
110 (LINK_SCHEME, LINK_SCHEME, QUOTED_STRING, QUOTED_STRING, '\u200b') +
111 # wiki:TracLinks or wiki:"trac links" or intertrac:wiki:"trac links"
112 r"(?:[\s%s]+(?P<label>%s|[^\]]*))?\])" %
113 ('\u200b', QUOTED_STRING)), # trailing space, optional label
114 # [=#anchor] creation
115 r"(?P<anchor>!?\[%s\])" % _set_anchor(XML_NAME, r'\s+'),
116 # [[macro]] call or [[WikiCreole link]]
117 (r"(?P<macrolink>!?\[\[(?:[^]]|][^]])+\]\])"),
118 # == heading == #hanchor
119 r"(?P<heading>^\s*(?P<hdepth>={1,6})\s(?P<htext>.*?)"
120 r"(?P<hanchor>#%s)?\s*$)" % XML_NAME,
121 # * list
122 r"(?P<list>^(?P<ldepth>\s*)"
123 r"(?:[%s]|(?P<lstart>[0-9]+|[a-zA-Z]|[ivxIVX]{1,5})\.)\s)"
124 % BULLET_CHARS,
125 # definition::
126 r"(?P<definition>^\s+"
127 r"((?:%s[^%s]*%s|%s(?:%s{,2}[^%s])*?%s|[^%s%s:]|:[^:])+::)(?:\s+|$))"
128 % (INLINE_TOKEN, INLINE_TOKEN, INLINE_TOKEN,
129 STARTBLOCK_TOKEN, ENDBLOCK[0], ENDBLOCK[0], ENDBLOCK_TOKEN,
130 INLINE_TOKEN, STARTBLOCK[0]),
131 # |- row separator
132 r"(?P<table_row_sep>!?\s*\|-+\s*"
133 r"(?P<table_row_params>%s\s*)*)" % PROCESSOR_PARAM,
134 # (leading space)
135 r"(?P<indent>^(?P<idepth>\s+)(?=\S))",
136 # || table ||
137 r"(?P<table_cell>!?(?P<table_cell_sep>=?(?:\|\|)+=?)"
138 r"(?P<table_cell_last>\s*\\?$)?)",
139 ]
140
141 _processor_re = re.compile(PROCESSOR)
142 _startblock_re = re.compile(r"\s*%s(?:%s|\s*$)" %
143 (STARTBLOCK, PROCESSOR))
144 _processor_param_re = re.compile(PROCESSOR_PARAM)
145 _anchor_re = re.compile(r'[^\w:.-]+', re.UNICODE)
146
147 _macro_re = re.compile(r'''
148 (?P<macroname> [\w/+-]+ \?? | \? ) # macro, macro? or ?
149 (?: \( (?P<macroargs> .*? ) \) )? $ # optional arguments within ()
150 ''', re.VERBOSE)
151
152 _creolelink_re = re.compile(r'''
153 (?:
154 (?P<rel> %(rel)s ) # rel is "./..." or "/..."
155 | (?: (?P<lns> %(scheme)s ) : )? # lns is the optional "scheme:"
156 (?P<ltgt> # ltgt is the optional target
157 %(scheme)s : (?:%(quoted)s) # - "scheme:'...quoted..'"
158 | %(quoted)s # - "'...quoted...'"
159 | [^|]+ # - anything but a '|'
160 )?
161 )
162 \s* (?: \| (?P<label> .* ) )? # optional label after a '|'
163 $
164 ''' % {'rel': _lhref_relative_target(r'|'),
165 'scheme': LINK_SCHEME,
166 'quoted': QUOTED_STRING}, re.VERBOSE)
167
168 _set_anchor_wc_re = re.compile(_set_anchor(XML_NAME, r'\|\s*') + r'$')
169
170 def __init__(self):
171 self._compiled_rules = None
172 self._link_resolvers = None
173 self._helper_patterns = None
174 self._external_handlers = None
175
176 @property
177 def rules(self):
178 self._prepare_rules()
179 return self._compiled_rules
180
181 @property
182 def helper_patterns(self):
183 self._prepare_rules()
184 return self._helper_patterns
185
186 @property
187 def external_handlers(self):
188 self._prepare_rules()
189 return self._external_handlers
190
191 def _prepare_rules(self):
192 from trac.wiki.api import WikiSystem
193 if not self._compiled_rules:
194 helpers = []
195 handlers = {}
196 syntax = self._pre_rules[:]
197 i = 0
198 for resolver in WikiSystem(self.env).syntax_providers:
199 for regexp, handler in resolver.get_wiki_syntax() or []:
200 handlers['i' + str(i)] = handler
201 syntax.append('(?P<i%d>%s)' % (i, regexp))
202 i += 1
203 syntax += self._post_rules[:]
204 helper_re = re.compile(r'\?P<([a-z\d_]+)>')
205 for rule in syntax:
206 helpers += helper_re.findall(rule)[1:]
207 rules = re.compile('(?:' + '|'.join(syntax) + ')', re.UNICODE)
208 self._external_handlers = handlers
209 self._helper_patterns = helpers
210 self._compiled_rules = rules
211
212 @property
213 def link_resolvers(self):
214 if not self._link_resolvers:
215 from trac.wiki.api import WikiSystem
216 resolvers = {}
217 for resolver in WikiSystem(self.env).syntax_providers:
218 for namespace, handler in resolver.get_link_resolvers() or []:
219 resolvers[namespace] = handler
220 self._link_resolvers = resolvers
221 return self._link_resolvers
222
223 def parse(self, wikitext):
224 """Parse `wikitext` and produce a WikiDOM tree."""
225 # obviously still some work to do here ;)
226 return wikitext
227
228
229_processor_pname_re = re.compile(r'[-\w]+$')
230
231
232def parse_processor_args(processor_args):
233 """Parse a string containing parameter assignments,
234 and return the corresponding dictionary.
235
236 Isolated keywords are interpreted as `bool` flags, `False` if the keyword
237 is prefixed with "-", `True` otherwise.
238
239 >>> parse_processor_args('ab="c de -f gh=ij" -')
240 {'ab': 'c de -f gh=ij'}
241
242 >>> sorted(parse_processor_args('ab=c de -f gh="ij klmn" p=q-r,s').items())
243 [('ab', 'c'), ('de', True), ('f', False), ('gh', 'ij klmn'), ('p', 'q-r,s')]
244
245 >>> args = 'data-name=foo-bar data-true -data-false'
246 >>> sorted(parse_processor_args(args).items())
247 [('data-false', False), ('data-name', 'foo-bar'), ('data-true', True)]
248 """
249 args = WikiParser._processor_param_re.split(processor_args)
250 keys = [str(k) for k in args[1::3]] # used as keyword parameters
251 values = [v[1:-1] if v[:1] + v[-1:] in ('""', "''") else v
252 for v in args[2::3]]
253 for flags in args[::3]:
254 for flag in flags.strip().split():
255 if _processor_pname_re.match(flag):
256 if flag[0] == '-':
257 if len(flag) > 1:
258 keys.append(str(flag[1:]))
259 values.append(False)
260 else:
261 keys.append(str(flag))
262 values.append(True)
263 return dict(zip(keys, values))
Note: See TracBrowser for help on using the repository browser.