Edgewall Software

Ticket #9025: t9025-unicode-CamelCase-r9250.2.patch

File t9025-unicode-CamelCase-r9250.2.patch, 3.9 KB (added by cboos, 2 years ago)

A slightly faster version, albeit with a twist to the WikiPageNames rules (Page/Sub? is now a valid wiki name)

  • trac/wiki/api.py

    # HG changeset patch
    # Parent f0574c8ae264d0c02c741286854b7dd7ab19e93b
    CamelCase: more direct regular expression taking directly into account the full list of upper and lower case unicode characters.
    
    The previous support for unicode (#230) used a regexp which could lead to some pathological run-time in presence of unicode words that didn't contain any alphanumerical characters.
    
    The runtime is nearly identical (a tad bit slower) and the WikiPageNames now also accept names like Page/Sub.
    
    Fixes #9025.
    
    diff --git a/trac/wiki/api.py b/trac/wiki/api.py
    a b class WikiSystem(Component): 
    230230    # here adapted to exclude terminal "." and ":" characters 
    231231 
    232232    PAGE_SPLIT_RE = re.compile(r"([a-z])([A-Z])(?=[a-z])") 
    233      
     233 
     234    Lu = ''.join(unichr(c) for c in range(0, 0x10000) if unichr(c).isupper()) 
     235    Ll = ''.join(unichr(c) for c in range(0, 0x10000) if unichr(c).islower()) 
     236 
    234237    def format_page_name(self, page, split=False): 
    235238        if split or self.split_page_names: 
    236239            return self.PAGE_SPLIT_RE.sub(r"\1 \2", page) 
    237240        return page 
    238241 
    239242    def get_wiki_syntax(self): 
    240         lower = r'(?<![A-Z0-9_])' # No Upper case when looking behind 
    241         upper = r'(?<![a-z0-9_])' # No Lower case when looking behind 
    242243        wiki_page_name = ( 
    243             r"\w%s(?:\w%s)+(?:\w%s(?:\w%s)*[\w/]%s)+" % # wiki words 
    244             (upper, lower, upper, lower, lower) + 
    245             r"(?:@\d+)?" # optional version 
    246             r"(?:#%s)?" % self.XML_NAME + # optional fragment id 
    247             r"(?=:(?:\Z|\s)|[^:a-zA-Z]|\s|\Z)" # what should follow it 
    248             ) 
    249  
     244            r"(?:[%(upper)s](?:[%(lower)s])+?/?){2,}" # wiki words 
     245            r"(?:@\d+)?"                             # optional version 
     246            r"(?:#%(xml)s)?"                         # optional fragment id 
     247            r"(?=:(?:\Z|\s)|[^:%(upper)s%(lower)s]|\s|\Z)" 
     248            # what should follow it 
     249            % {'upper': self.Lu, 'lower': self.Ll, 'xml': self.XML_NAME}) 
    250250         
    251251        # Regular WikiPageNames 
    252252        def wikipagename_link(formatter, match, fullmatch): 
    253             if not _check_unicode_camelcase(match): 
    254                 return match 
    255253            return self._format_link(formatter, 'wiki', match, 
    256254                                     self.format_page_name(match), 
    257255                                     self.ignore_missing_pages, match) 
    class WikiSystem(Component): 
    263261        def wikipagename_with_label_link(formatter, match, fullmatch): 
    264262            page = fullmatch.group('wiki_page') 
    265263            label = fullmatch.group('wiki_label') 
    266             if not _check_unicode_camelcase(page): 
    267                 return label 
    268264            return self._format_link(formatter, 'wiki', page, label.strip(), 
    269265                                     self.ignore_missing_pages, match) 
    270266        yield (r"!?\[(?P<wiki_page>%s)\s+(?P<wiki_label>%s|[^\]]+)\]" 
    class WikiSystem(Component): 
    380376        'Wiki Start' 
    381377        """ 
    382378        return self.format_page_name(resource.id) 
    383  
    384  
    385 def _check_unicode_camelcase(pagename): 
    386     """A camelcase word must have at least 2 humps (well...) 
    387  
    388     >>> _check_unicode_camelcase(u"\xc9l\xe9phant") 
    389     False 
    390     >>> _check_unicode_camelcase(u"\xc9l\xe9Phant") 
    391     True 
    392     >>> _check_unicode_camelcase(u"\xe9l\xe9Phant") 
    393     False 
    394     >>> _check_unicode_camelcase(u"\xc9l\xe9PhanT") 
    395     False 
    396     """ 
    397     if not pagename[0].isupper(): 
    398         return False 
    399     pagename = pagename.split('@', 1)[0].split('#', 1)[0] 
    400     if not pagename[-1].islower(): 
    401         return False 
    402     humps = 0 
    403     for i in xrange(1, len(pagename)): 
    404         if pagename[i-1].isupper(): 
    405             if pagename[i].islower(): 
    406                 humps += 1 
    407             else: 
    408                 return False 
    409     return humps > 1 
    410