# HG changeset patch
# Parent f0574c8ae264d0c02c741286854b7dd7ab19e93b
CamelCase: more direct regular expression taking directly into account the full list of upper and lower case unicode characters.
The previous support for unicode (#230) used a regexp which could lead to some pathological run-time in presence of unicode words that didn't contain any alphanumerical characters.
The runtime is nearly identical (a tad bit slower) and the WikiPageNames now also accept names like Page/Sub.
Fixes #9025.
diff --git a/trac/wiki/api.py b/trac/wiki/api.py
|
a
|
b
|
class WikiSystem(Component): |
| 230 | 230 | # here adapted to exclude terminal "." and ":" characters |
| 231 | 231 | |
| 232 | 232 | PAGE_SPLIT_RE = re.compile(r"([a-z])([A-Z])(?=[a-z])") |
| 233 | | |
| | 233 | |
| | 234 | Lu = ''.join(unichr(c) for c in range(0, 0x10000) if unichr(c).isupper()) |
| | 235 | Ll = ''.join(unichr(c) for c in range(0, 0x10000) if unichr(c).islower()) |
| | 236 | |
| 234 | 237 | def format_page_name(self, page, split=False): |
| 235 | 238 | if split or self.split_page_names: |
| 236 | 239 | return self.PAGE_SPLIT_RE.sub(r"\1 \2", page) |
| 237 | 240 | return page |
| 238 | 241 | |
| 239 | 242 | def get_wiki_syntax(self): |
| 240 | | lower = r'(?<![A-Z0-9_])' # No Upper case when looking behind |
| 241 | | upper = r'(?<![a-z0-9_])' # No Lower case when looking behind |
| 242 | 243 | wiki_page_name = ( |
| 243 | | r"\w%s(?:\w%s)+(?:\w%s(?:\w%s)*[\w/]%s)+" % # wiki words |
| 244 | | (upper, lower, upper, lower, lower) + |
| 245 | | r"(?:@\d+)?" # optional version |
| 246 | | r"(?:#%s)?" % self.XML_NAME + # optional fragment id |
| 247 | | r"(?=:(?:\Z|\s)|[^:a-zA-Z]|\s|\Z)" # what should follow it |
| 248 | | ) |
| 249 | | |
| | 244 | r"(?:[%(upper)s](?:[%(lower)s])+?/?){2,}" # wiki words |
| | 245 | r"(?:@\d+)?" # optional version |
| | 246 | r"(?:#%(xml)s)?" # optional fragment id |
| | 247 | r"(?=:(?:\Z|\s)|[^:%(upper)s%(lower)s]|\s|\Z)" |
| | 248 | # what should follow it |
| | 249 | % {'upper': self.Lu, 'lower': self.Ll, 'xml': self.XML_NAME}) |
| 250 | 250 | |
| 251 | 251 | # Regular WikiPageNames |
| 252 | 252 | def wikipagename_link(formatter, match, fullmatch): |
| 253 | | if not _check_unicode_camelcase(match): |
| 254 | | return match |
| 255 | 253 | return self._format_link(formatter, 'wiki', match, |
| 256 | 254 | self.format_page_name(match), |
| 257 | 255 | self.ignore_missing_pages, match) |
| … |
… |
class WikiSystem(Component): |
| 263 | 261 | def wikipagename_with_label_link(formatter, match, fullmatch): |
| 264 | 262 | page = fullmatch.group('wiki_page') |
| 265 | 263 | label = fullmatch.group('wiki_label') |
| 266 | | if not _check_unicode_camelcase(page): |
| 267 | | return label |
| 268 | 264 | return self._format_link(formatter, 'wiki', page, label.strip(), |
| 269 | 265 | self.ignore_missing_pages, match) |
| 270 | 266 | yield (r"!?\[(?P<wiki_page>%s)\s+(?P<wiki_label>%s|[^\]]+)\]" |
| … |
… |
class WikiSystem(Component): |
| 380 | 376 | 'Wiki Start' |
| 381 | 377 | """ |
| 382 | 378 | return self.format_page_name(resource.id) |
| 383 | | |
| 384 | | |
| 385 | | def _check_unicode_camelcase(pagename): |
| 386 | | """A camelcase word must have at least 2 humps (well...) |
| 387 | | |
| 388 | | >>> _check_unicode_camelcase(u"\xc9l\xe9phant") |
| 389 | | False |
| 390 | | >>> _check_unicode_camelcase(u"\xc9l\xe9Phant") |
| 391 | | True |
| 392 | | >>> _check_unicode_camelcase(u"\xe9l\xe9Phant") |
| 393 | | False |
| 394 | | >>> _check_unicode_camelcase(u"\xc9l\xe9PhanT") |
| 395 | | False |
| 396 | | """ |
| 397 | | if not pagename[0].isupper(): |
| 398 | | return False |
| 399 | | pagename = pagename.split('@', 1)[0].split('#', 1)[0] |
| 400 | | if not pagename[-1].islower(): |
| 401 | | return False |
| 402 | | humps = 0 |
| 403 | | for i in xrange(1, len(pagename)): |
| 404 | | if pagename[i-1].isupper(): |
| 405 | | if pagename[i].islower(): |
| 406 | | humps += 1 |
| 407 | | else: |
| 408 | | return False |
| 409 | | return humps > 1 |
| 410 | | |