| 1 | # -*- coding: utf-8 -*-
|
|---|
| 2 | #
|
|---|
| 3 | # Copyright (C) 2003-2023 Edgewall Software
|
|---|
| 4 | # Copyright (C) 2003-2004 Jonas Borgström <jonas@edgewall.com>
|
|---|
| 5 | # Copyright (C) 2006 Matthew Good <trac@matt-good.net>
|
|---|
| 6 | # Copyright (C) 2005-2006 Christian Boos <cboos@edgewall.org>
|
|---|
| 7 | # All rights reserved.
|
|---|
| 8 | #
|
|---|
| 9 | # This software is licensed as described in the file COPYING, which
|
|---|
| 10 | # you should have received as part of this distribution. The terms
|
|---|
| 11 | # are also available at https://trac.edgewall.org/wiki/TracLicense.
|
|---|
| 12 | #
|
|---|
| 13 | # This software consists of voluntary contributions made by many
|
|---|
| 14 | # individuals. For the exact contribution history, see the revision
|
|---|
| 15 | # history and logs, available at https://trac.edgewall.org/log/.
|
|---|
| 16 | #
|
|---|
| 17 | # Author: Jonas Borgström <jonas@edgewall.com>
|
|---|
| 18 | # Matthew Good <trac@matt-good.net>
|
|---|
| 19 | # Christian Boos <cboos@edgewall.org>
|
|---|
| 20 |
|
|---|
| 21 | import base64
|
|---|
| 22 | import configparser
|
|---|
| 23 | import locale
|
|---|
| 24 | import os
|
|---|
| 25 | import pkg_resources
|
|---|
| 26 | import re
|
|---|
| 27 | import sys
|
|---|
| 28 | import textwrap
|
|---|
| 29 | from urllib.parse import quote, quote_plus, unquote
|
|---|
| 30 | from unicodedata import east_asian_width
|
|---|
| 31 |
|
|---|
| 32 | import jinja2
|
|---|
| 33 |
|
|---|
| 34 | CRLF = '\r\n'
|
|---|
| 35 |
|
|---|
| 36 | class Empty(str):
|
|---|
| 37 | """A special tag object evaluating to the empty string"""
|
|---|
| 38 | __slots__ = []
|
|---|
| 39 |
|
|---|
| 40 | empty = Empty()
|
|---|
| 41 |
|
|---|
| 42 | del Empty # shouldn't be used outside of Trac core
|
|---|
| 43 |
|
|---|
| 44 |
|
|---|
| 45 | # -- Jinja2
|
|---|
| 46 |
|
|---|
| 47 | _jinja2_ver = pkg_resources.parse_version(jinja2.__version__)
|
|---|
| 48 | _jinja2_exts = ['jinja2.ext.do', 'jinja2.ext.i18n']
|
|---|
| 49 | if _jinja2_ver < pkg_resources.parse_version('3'):
|
|---|
| 50 | _jinja2_exts.append('jinja2.ext.with_')
|
|---|
| 51 |
|
|---|
| 52 | def jinja2env(**kwargs):
|
|---|
| 53 | """Creates a Jinja2 ``Environment`` configured with Trac conventions.
|
|---|
| 54 |
|
|---|
| 55 | All default parameters can optionally be overridden. The ``loader``
|
|---|
| 56 | parameter is not set by default, so unless it is set by the
|
|---|
| 57 | caller, only inline templates can be created from the environment.
|
|---|
| 58 |
|
|---|
| 59 | :rtype: `jinja.Environment`
|
|---|
| 60 |
|
|---|
| 61 | """
|
|---|
| 62 | exts = ('.html', '.rss', '.xml')
|
|---|
| 63 | def filterout_none(v):
|
|---|
| 64 | return '' if v is None else v
|
|---|
| 65 | def autoescape_extensions(template):
|
|---|
| 66 | return template and template.endswith(exts)
|
|---|
| 67 | defaults = dict(
|
|---|
| 68 | variable_start_string='${',
|
|---|
| 69 | variable_end_string='}',
|
|---|
| 70 | line_statement_prefix='#',
|
|---|
| 71 | line_comment_prefix='##',
|
|---|
| 72 | trim_blocks=True,
|
|---|
| 73 | lstrip_blocks=True,
|
|---|
| 74 | extensions=list(_jinja2_exts),
|
|---|
| 75 | finalize=filterout_none,
|
|---|
| 76 | autoescape=autoescape_extensions,
|
|---|
| 77 | )
|
|---|
| 78 | defaults.update(kwargs)
|
|---|
| 79 | jenv = jinja2.Environment(**defaults)
|
|---|
| 80 | jenv.globals.update(
|
|---|
| 81 | len=len,
|
|---|
| 82 | )
|
|---|
| 83 | return jenv
|
|---|
| 84 |
|
|---|
| 85 | def jinja2template(template, text=False, **kwargs):
|
|---|
| 86 | """Creates a Jinja2 ``Template`` from inlined source.
|
|---|
| 87 |
|
|---|
| 88 | :param template: the template content
|
|---|
| 89 | :param text: if set to `False`, the result of the variable
|
|---|
| 90 | expansion will be XML/HTML escaped
|
|---|
| 91 | :param kwargs: additional arguments to pass to `jinja2env`. See
|
|---|
| 92 | `jinja2.Environment` for supported arguments.
|
|---|
| 93 | """
|
|---|
| 94 | return jinja2env(autoescape=not text, **kwargs).from_string(template)
|
|---|
| 95 |
|
|---|
| 96 |
|
|---|
| 97 | # -- Unicode
|
|---|
| 98 |
|
|---|
| 99 | def to_unicode(text, charset=None):
|
|---|
| 100 | """Convert input to a `str` object.
|
|---|
| 101 |
|
|---|
| 102 | For a `bytes` object, we'll first try to decode the bytes using the given
|
|---|
| 103 | `charset` encoding (or UTF-8 if none is specified), then we fall back to
|
|---|
| 104 | the latin1 encoding which might be correct or not, but at least preserves
|
|---|
| 105 | the original byte sequence by mapping each byte to the corresponding
|
|---|
| 106 | unicode code point in the range U+0000 to U+00FF.
|
|---|
| 107 |
|
|---|
| 108 | For anything else, a simple `str()` conversion is attempted,
|
|---|
| 109 | with special care taken with `Exception` objects.
|
|---|
| 110 | """
|
|---|
| 111 | if isinstance(text, bytes):
|
|---|
| 112 | try:
|
|---|
| 113 | return str(text, charset or 'utf-8')
|
|---|
| 114 | except UnicodeDecodeError:
|
|---|
| 115 | return str(text, 'latin1')
|
|---|
| 116 | if isinstance(text, Exception):
|
|---|
| 117 | # two possibilities for storing unicode strings in exception data:
|
|---|
| 118 | try:
|
|---|
| 119 | # custom __str__ method on the exception (e.g. PermissionError)
|
|---|
| 120 | result = str(text)
|
|---|
| 121 | except UnicodeError:
|
|---|
| 122 | # unicode arguments given to the exception (e.g. parse_date)
|
|---|
| 123 | return ' '.join(to_unicode(arg) for arg in text.args)
|
|---|
| 124 | if os.name == 'nt':
|
|---|
| 125 | # remove duplicated backslashes from filename in the message
|
|---|
| 126 | if isinstance(text, EnvironmentError) and text.filename:
|
|---|
| 127 | source = repr(text.filename)
|
|---|
| 128 | elif isinstance(text, configparser.ParsingError) and text.source:
|
|---|
| 129 | source = repr(text.source)
|
|---|
| 130 | else:
|
|---|
| 131 | source = None
|
|---|
| 132 | if source:
|
|---|
| 133 | result = result.replace(source, source.replace(r'\\', '\\'))
|
|---|
| 134 | return result
|
|---|
| 135 | return str(text)
|
|---|
| 136 |
|
|---|
| 137 |
|
|---|
| 138 | def exception_to_unicode(e, traceback=False):
|
|---|
| 139 | """Convert an `Exception` to a `str` object.
|
|---|
| 140 |
|
|---|
| 141 | In addition to `to_unicode`, this representation of the exception
|
|---|
| 142 | also contains the class name and optionally the traceback.
|
|---|
| 143 | """
|
|---|
| 144 | message = '%s: %s' % (e.__class__.__name__, to_unicode(e))
|
|---|
| 145 | if traceback:
|
|---|
| 146 | from trac.util import get_last_traceback
|
|---|
| 147 | traceback_only = get_last_traceback().split('\n')[:-2]
|
|---|
| 148 | message = '\n%s\n%s' % (to_unicode('\n'.join(traceback_only)), message)
|
|---|
| 149 | return message
|
|---|
| 150 |
|
|---|
| 151 |
|
|---|
| 152 | def path_to_unicode(path):
|
|---|
| 153 | """Convert a filesystem path to str, using the filesystem encoding."""
|
|---|
| 154 | if isinstance(path, bytes):
|
|---|
| 155 | try:
|
|---|
| 156 | return str(path, sys.getfilesystemencoding())
|
|---|
| 157 | except UnicodeDecodeError:
|
|---|
| 158 | return str(path, 'latin1')
|
|---|
| 159 | return str(path)
|
|---|
| 160 |
|
|---|
| 161 |
|
|---|
| 162 | _ws_leading_re = re.compile('\\A[\\s\u200b]+', re.UNICODE)
|
|---|
| 163 | _ws_trailing_re = re.compile('[\\s\u200b]+\\Z', re.UNICODE)
|
|---|
| 164 |
|
|---|
| 165 | def stripws(text, leading=True, trailing=True):
|
|---|
| 166 | """Strips unicode white-spaces and ZWSPs from ``text``.
|
|---|
| 167 |
|
|---|
| 168 | :param leading: strips leading spaces from ``text`` unless ``leading`` is
|
|---|
| 169 | `False`.
|
|---|
| 170 | :param trailing: strips trailing spaces from ``text`` unless ``trailing``
|
|---|
| 171 | is `False`.
|
|---|
| 172 | """
|
|---|
| 173 | if leading:
|
|---|
| 174 | text = _ws_leading_re.sub('', text)
|
|---|
| 175 | if trailing:
|
|---|
| 176 | text = _ws_trailing_re.sub('', text)
|
|---|
| 177 | return text
|
|---|
| 178 |
|
|---|
| 179 |
|
|---|
| 180 | def strip_line_ws(text, leading=True, trailing=True):
|
|---|
| 181 | """Strips unicode white-spaces and ZWSPs from each line of ``text``.
|
|---|
| 182 |
|
|---|
| 183 | :param leading: strips leading spaces from ``text`` unless ``leading`` is
|
|---|
| 184 | `False`.
|
|---|
| 185 | :param trailing: strips trailing spaces from ``text`` unless ``trailing``
|
|---|
| 186 | is `False`.
|
|---|
| 187 | """
|
|---|
| 188 | lines = re.compile(r'(\n|\r\n|\r)').split(text)
|
|---|
| 189 | if leading:
|
|---|
| 190 | lines[::2] = (_ws_leading_re.sub('', line) for line in lines[::2])
|
|---|
| 191 | if trailing:
|
|---|
| 192 | lines[::2] = (_ws_trailing_re.sub('', line) for line in lines[::2])
|
|---|
| 193 | return ''.join(lines)
|
|---|
| 194 |
|
|---|
| 195 |
|
|---|
| 196 | _js_quote = {'\\': '\\\\', '"': '\\"', '\b': '\\b', '\f': '\\f',
|
|---|
| 197 | '\n': '\\n', '\r': '\\r', '\t': '\\t', "'": "\\'"}
|
|---|
| 198 | for i in list(range(0x20)) + [ord(c) for c in '&<>\u2028\u2029']:
|
|---|
| 199 | _js_quote.setdefault(chr(i), '\\u%04x' % i)
|
|---|
| 200 | _js_quote_re = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t\'&<>' + '\u2028\u2029]')
|
|---|
| 201 | _js_string_re = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t&<>' + '\u2028\u2029]')
|
|---|
| 202 |
|
|---|
| 203 |
|
|---|
| 204 | def javascript_quote(text):
|
|---|
| 205 | """Quote strings for inclusion in single or double quote delimited
|
|---|
| 206 | Javascript strings
|
|---|
| 207 | """
|
|---|
| 208 | if not text:
|
|---|
| 209 | return ''
|
|---|
| 210 | def replace(match):
|
|---|
| 211 | return _js_quote[match.group(0)]
|
|---|
| 212 | return _js_quote_re.sub(replace, text)
|
|---|
| 213 |
|
|---|
| 214 |
|
|---|
| 215 | def to_js_string(text):
|
|---|
| 216 | """Embed the given string in a double quote delimited Javascript string
|
|---|
| 217 | (conform to the JSON spec)
|
|---|
| 218 | """
|
|---|
| 219 | if not text:
|
|---|
| 220 | return '""'
|
|---|
| 221 | def replace(match):
|
|---|
| 222 | return _js_quote[match.group(0)]
|
|---|
| 223 | return '"%s"' % _js_string_re.sub(replace, text)
|
|---|
| 224 |
|
|---|
| 225 |
|
|---|
| 226 | def unicode_quote(value, safe='/'):
|
|---|
| 227 | """A unicode aware version of `urllib.quote`
|
|---|
| 228 |
|
|---|
| 229 | :param value: anything that converts to a `bytes`. If `str`
|
|---|
| 230 | input is given, it will be UTF-8 encoded.
|
|---|
| 231 | :param safe: as in `quote`, the characters that would otherwise be
|
|---|
| 232 | quoted but shouldn't here (defaults to '/')
|
|---|
| 233 | """
|
|---|
| 234 | return quote(value if isinstance(value, bytes) else str(value), safe)
|
|---|
| 235 |
|
|---|
| 236 |
|
|---|
| 237 | def unicode_quote_plus(value, safe=''):
|
|---|
| 238 | """A unicode aware version of `urllib.quote_plus`.
|
|---|
| 239 |
|
|---|
| 240 | :param value: anything that converts to a `bytes`. If `str`
|
|---|
| 241 | input is given, it will be UTF-8 encoded.
|
|---|
| 242 | :param safe: as in `quote_plus`, the characters that would
|
|---|
| 243 | otherwise be quoted but shouldn't here (defaults to
|
|---|
| 244 | '/')
|
|---|
| 245 | """
|
|---|
| 246 | return quote_plus(value if isinstance(value, bytes) else str(value), safe)
|
|---|
| 247 |
|
|---|
| 248 |
|
|---|
| 249 | def unicode_unquote(value):
|
|---|
| 250 | """A unicode aware version of `urllib.unquote`.
|
|---|
| 251 |
|
|---|
| 252 | :param value: UTF-8 encoded `str` value (for example, as obtained by
|
|---|
| 253 | `unicode_quote`).
|
|---|
| 254 | :rtype: `str`
|
|---|
| 255 | """
|
|---|
| 256 | if isinstance(value, bytes):
|
|---|
| 257 | value = value.decode('latin1')
|
|---|
| 258 | return unquote(value, encoding='utf-8', errors='strict')
|
|---|
| 259 |
|
|---|
| 260 |
|
|---|
| 261 | def unicode_urlencode(params, safe=''):
|
|---|
| 262 | """A unicode aware version of `urllib.urlencode`.
|
|---|
| 263 |
|
|---|
| 264 | Values set to `empty` are converted to the key alone, without the
|
|---|
| 265 | equal sign.
|
|---|
| 266 | """
|
|---|
| 267 | if isinstance(params, dict):
|
|---|
| 268 | params = sorted(params.items(), key=lambda i: i[0])
|
|---|
| 269 | l = []
|
|---|
| 270 | for k, v in params:
|
|---|
| 271 | if v is empty:
|
|---|
| 272 | l.append(unicode_quote_plus(k, safe))
|
|---|
| 273 | else:
|
|---|
| 274 | l.append(unicode_quote_plus(k, safe) + '=' +
|
|---|
| 275 | unicode_quote_plus(v, safe))
|
|---|
| 276 | return '&'.join(l)
|
|---|
| 277 |
|
|---|
| 278 |
|
|---|
| 279 | _qs_quote_safe = ''.join(chr(c) for c in range(0x21, 0x7f))
|
|---|
| 280 |
|
|---|
| 281 | def quote_query_string(text):
|
|---|
| 282 | """Quote strings for query string
|
|---|
| 283 | """
|
|---|
| 284 | return unicode_quote_plus(text, _qs_quote_safe)
|
|---|
| 285 |
|
|---|
| 286 |
|
|---|
| 287 | def to_utf8(text, charset='latin1'):
|
|---|
| 288 | """Convert input to a UTF-8 `bytes` object.
|
|---|
| 289 |
|
|---|
| 290 | If the input is not an `str` object, we assume the encoding is
|
|---|
| 291 | already UTF-8, ISO Latin-1, or as specified by the optional
|
|---|
| 292 | *charset* parameter.
|
|---|
| 293 | """
|
|---|
| 294 | if isinstance(text, bytes):
|
|---|
| 295 | try:
|
|---|
| 296 | u = str(text, 'utf-8')
|
|---|
| 297 | except UnicodeError:
|
|---|
| 298 | try:
|
|---|
| 299 | # Use the user supplied charset if possible
|
|---|
| 300 | u = str(text, charset)
|
|---|
| 301 | except UnicodeError:
|
|---|
| 302 | # This should always work
|
|---|
| 303 | u = str(text, 'latin1')
|
|---|
| 304 | else:
|
|---|
| 305 | # Do nothing if it's already utf-8
|
|---|
| 306 | return text
|
|---|
| 307 | else:
|
|---|
| 308 | u = to_unicode(text)
|
|---|
| 309 | return u.encode('utf-8')
|
|---|
| 310 |
|
|---|
| 311 |
|
|---|
| 312 | class unicode_passwd(str):
|
|---|
| 313 | """Conceal the actual content of the string when `repr` is called."""
|
|---|
| 314 | def __repr__(self):
|
|---|
| 315 | return '*******'
|
|---|
| 316 |
|
|---|
| 317 |
|
|---|
| 318 | def stream_encoding(stream):
|
|---|
| 319 | """Return the appropriate encoding for the given stream."""
|
|---|
| 320 | encoding = getattr(stream, 'encoding', None)
|
|---|
| 321 | # Windows returns 'cp0' to indicate no encoding
|
|---|
| 322 | return encoding if encoding not in (None, 'cp0') else 'utf-8'
|
|---|
| 323 |
|
|---|
| 324 |
|
|---|
| 325 | def console_print(out, *args, **kwargs):
|
|---|
| 326 | """Output the given arguments to the console, encoding the output
|
|---|
| 327 | as appropriate.
|
|---|
| 328 |
|
|---|
| 329 | :param kwargs: ``newline`` controls whether a newline will be appended
|
|---|
| 330 | (defaults to `True`)
|
|---|
| 331 | """
|
|---|
| 332 | out.write(' '.join(to_unicode(a) for a in args))
|
|---|
| 333 | if kwargs.get('newline', True):
|
|---|
| 334 | out.write('\n')
|
|---|
| 335 |
|
|---|
| 336 |
|
|---|
| 337 | def printout(*args, **kwargs):
|
|---|
| 338 | """Do a `console_print` on `sys.stdout`."""
|
|---|
| 339 | console_print(sys.stdout, *args, **kwargs)
|
|---|
| 340 |
|
|---|
| 341 |
|
|---|
| 342 | def printerr(*args, **kwargs):
|
|---|
| 343 | """Do a `console_print` on `sys.stderr`."""
|
|---|
| 344 | console_print(sys.stderr, *args, **kwargs)
|
|---|
| 345 |
|
|---|
| 346 |
|
|---|
| 347 | def printfout(message, *args, **kwargs):
|
|---|
| 348 | """Format `message`, do a `console.print` on `sys.stdout` and flush
|
|---|
| 349 | the buffer.
|
|---|
| 350 | """
|
|---|
| 351 | if args:
|
|---|
| 352 | message %= args
|
|---|
| 353 | printout(message, **kwargs)
|
|---|
| 354 | sys.stdout.flush()
|
|---|
| 355 |
|
|---|
| 356 |
|
|---|
| 357 | def printferr(message, *args, **kwargs):
|
|---|
| 358 | """Format `message`, do a `console.print` on `sys.stderr` and flush
|
|---|
| 359 | the buffer.
|
|---|
| 360 | """
|
|---|
| 361 | if args:
|
|---|
| 362 | message %= args
|
|---|
| 363 | printerr(message, **kwargs)
|
|---|
| 364 | sys.stderr.flush()
|
|---|
| 365 |
|
|---|
| 366 |
|
|---|
| 367 | def raw_input(prompt):
|
|---|
| 368 | """Input one line from the console and converts it to unicode as
|
|---|
| 369 | appropriate.
|
|---|
| 370 | """
|
|---|
| 371 | printout(prompt, newline=False)
|
|---|
| 372 | return to_unicode(input(), sys.stdin.encoding)
|
|---|
| 373 |
|
|---|
| 374 |
|
|---|
| 375 | _preferredencoding = locale.getpreferredencoding()
|
|---|
| 376 |
|
|---|
| 377 | def getpreferredencoding():
|
|---|
| 378 | """Return the encoding, which is retrieved on ahead, according to user
|
|---|
| 379 | preference.
|
|---|
| 380 |
|
|---|
| 381 | We should use this instead of `locale.getpreferredencoding()` which
|
|---|
| 382 | is not thread-safe."""
|
|---|
| 383 | return _preferredencoding
|
|---|
| 384 |
|
|---|
| 385 |
|
|---|
| 386 | # -- Plain text formatting
|
|---|
| 387 |
|
|---|
| 388 | def text_width(text, ambiwidth=1):
|
|---|
| 389 | """Determine the column width of `text` in Unicode characters.
|
|---|
| 390 |
|
|---|
| 391 | The characters in the East Asian Fullwidth (F) or East Asian Wide (W)
|
|---|
| 392 | have a column width of 2. The other characters in the East Asian
|
|---|
| 393 | Halfwidth (H) or East Asian Narrow (Na) have a column width of 1.
|
|---|
| 394 |
|
|---|
| 395 | That `ambiwidth` parameter is used for the column width of the East
|
|---|
| 396 | Asian Ambiguous (A). If `1`, the same width as characters in US-ASCII.
|
|---|
| 397 | This is expected by most users. If `2`, twice the width of US-ASCII
|
|---|
| 398 | characters. This is expected by CJK users.
|
|---|
| 399 |
|
|---|
| 400 | cf. http://www.unicode.org/reports/tr11/.
|
|---|
| 401 | """
|
|---|
| 402 | twice = 'FWA' if ambiwidth == 2 else 'FW'
|
|---|
| 403 | return sum([2 if east_asian_width(chr) in twice else 1
|
|---|
| 404 | for chr in to_unicode(text)])
|
|---|
| 405 |
|
|---|
| 406 |
|
|---|
| 407 | def _get_default_ambiwidth():
|
|---|
| 408 | """Return width of East Asian Ambiguous based on locale environment
|
|---|
| 409 | variables or Windows codepage.
|
|---|
| 410 | """
|
|---|
| 411 |
|
|---|
| 412 | if os.name == 'nt':
|
|---|
| 413 | import ctypes
|
|---|
| 414 | codepage = ctypes.windll.kernel32.GetConsoleOutputCP()
|
|---|
| 415 | if codepage in (932, # Japanese (Shift-JIS)
|
|---|
| 416 | 936, # Chinese Simplified (GB2312)
|
|---|
| 417 | 949, # Korean (Unified Hangul Code)
|
|---|
| 418 | 950): # Chinese Traditional (Big5)
|
|---|
| 419 | return 2
|
|---|
| 420 | else:
|
|---|
| 421 | for name in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):
|
|---|
| 422 | value = os.environ.get(name) or ''
|
|---|
| 423 | if value:
|
|---|
| 424 | if name == 'LANGUAGE' and ':' in value:
|
|---|
| 425 | value = value.split(':')[0]
|
|---|
| 426 | return 2 if value.lower().startswith(('zh', 'ja', 'ko')) else 1
|
|---|
| 427 |
|
|---|
| 428 | return 1
|
|---|
| 429 |
|
|---|
| 430 |
|
|---|
| 431 | _default_ambiwidth = _get_default_ambiwidth()
|
|---|
| 432 |
|
|---|
| 433 |
|
|---|
| 434 | def print_table(data, headers=None, sep=' ', out=None, ambiwidth=None):
|
|---|
| 435 | """Print data according to a tabular layout.
|
|---|
| 436 |
|
|---|
| 437 | :param data: a sequence of rows; assume all rows are of equal length.
|
|---|
| 438 | :param headers: an optional row containing column headers; must be of
|
|---|
| 439 | the same length as each row in `data`.
|
|---|
| 440 | :param sep: column separator
|
|---|
| 441 | :param out: output file descriptor (`None` means use `sys.stdout`)
|
|---|
| 442 | :param ambiwidth: column width of the East Asian Ambiguous (A). If None,
|
|---|
| 443 | detect ambiwidth with the locale settings. If others,
|
|---|
| 444 | pass to the `ambiwidth` parameter of `text_width`.
|
|---|
| 445 | """
|
|---|
| 446 | if out is None:
|
|---|
| 447 | out = sys.stdout
|
|---|
| 448 | if ambiwidth is None:
|
|---|
| 449 | ambiwidth = _default_ambiwidth
|
|---|
| 450 | data = list(data)
|
|---|
| 451 | if headers:
|
|---|
| 452 | data.insert(0, headers)
|
|---|
| 453 | elif not data:
|
|---|
| 454 | return
|
|---|
| 455 |
|
|---|
| 456 | # Convert to a str object with `to_unicode`. If None, convert to a
|
|---|
| 457 | # empty string.
|
|---|
| 458 | def to_text(val):
|
|---|
| 459 | if val is None:
|
|---|
| 460 | return ''
|
|---|
| 461 | return to_unicode(val)
|
|---|
| 462 |
|
|---|
| 463 | def tw(text):
|
|---|
| 464 | return text_width(text, ambiwidth=ambiwidth)
|
|---|
| 465 |
|
|---|
| 466 | def to_lines(data):
|
|---|
| 467 | lines = []
|
|---|
| 468 | for row in data:
|
|---|
| 469 | row = [to_text(cell) for cell in row]
|
|---|
| 470 | if any('\n' in cell for cell in row):
|
|---|
| 471 | row = [cell.splitlines() for cell in row]
|
|---|
| 472 | max_lines = max(len(cell) for cell in row)
|
|---|
| 473 | for cell in row:
|
|---|
| 474 | if len(cell) < max_lines:
|
|---|
| 475 | cell += [''] * (max_lines - len(cell))
|
|---|
| 476 | lines.extend([cell[idx] for cell in row]
|
|---|
| 477 | for idx in range(max_lines))
|
|---|
| 478 | else:
|
|---|
| 479 | lines.append(row)
|
|---|
| 480 | return lines
|
|---|
| 481 |
|
|---|
| 482 | data = to_lines(data)
|
|---|
| 483 |
|
|---|
| 484 | num_cols = len(data[0])
|
|---|
| 485 | col_width = [max(tw(row[idx]) for row in data)
|
|---|
| 486 | for idx in range(num_cols)]
|
|---|
| 487 |
|
|---|
| 488 | out.write('\n')
|
|---|
| 489 | for ridx, row in enumerate(data):
|
|---|
| 490 | for cidx, cell in enumerate(row):
|
|---|
| 491 | if cidx + 1 == num_cols:
|
|---|
| 492 | line = cell # No separator after last column
|
|---|
| 493 | else:
|
|---|
| 494 | if headers and ridx == 0:
|
|---|
| 495 | sp = ' ' * tw(sep) # No separator in header
|
|---|
| 496 | else:
|
|---|
| 497 | sp = sep
|
|---|
| 498 | line = '%-*s%s' % (col_width[cidx] - tw(cell) + len(cell),
|
|---|
| 499 | cell, sp)
|
|---|
| 500 | out.write(line)
|
|---|
| 501 |
|
|---|
| 502 | out.write('\n')
|
|---|
| 503 | if ridx == 0 and headers:
|
|---|
| 504 | out.write('-' * (tw(sep) * cidx + sum(col_width)))
|
|---|
| 505 | out.write('\n')
|
|---|
| 506 | out.write('\n')
|
|---|
| 507 |
|
|---|
| 508 |
|
|---|
| 509 | def shorten_line(text, maxlen=75):
|
|---|
| 510 | """Truncates `text` to length less than or equal to `maxlen` characters.
|
|---|
| 511 |
|
|---|
| 512 | This tries to be (a bit) clever and attempts to find a proper word
|
|---|
| 513 | boundary for doing so.
|
|---|
| 514 | """
|
|---|
| 515 | if len(text or '') <= maxlen:
|
|---|
| 516 | return text
|
|---|
| 517 | suffix = ' ...'
|
|---|
| 518 | maxtextlen = maxlen - len(suffix)
|
|---|
| 519 | cut = max(text.rfind(' ', 0, maxtextlen), text.rfind('\n', 0, maxtextlen))
|
|---|
| 520 | if cut < 0:
|
|---|
| 521 | cut = maxtextlen
|
|---|
| 522 | return text[:cut] + suffix
|
|---|
| 523 |
|
|---|
| 524 |
|
|---|
| 525 | class UnicodeTextWrapper(textwrap.TextWrapper):
|
|---|
| 526 | breakable_char_ranges = [
|
|---|
| 527 | (0x1100, 0x11FF), # Hangul Jamo
|
|---|
| 528 | (0x2E80, 0x2EFF), # CJK Radicals Supplement
|
|---|
| 529 | (0x3000, 0x303F), # CJK Symbols and Punctuation
|
|---|
| 530 | (0x3040, 0x309F), # Hiragana
|
|---|
| 531 | (0x30A0, 0x30FF), # Katakana
|
|---|
| 532 | (0x3130, 0x318F), # Hangul Compatibility Jamo
|
|---|
| 533 | (0x3190, 0x319F), # Kanbun
|
|---|
| 534 | (0x31C0, 0x31EF), # CJK Strokes
|
|---|
| 535 | (0x3200, 0x32FF), # Enclosed CJK Letters and Months
|
|---|
| 536 | (0x3300, 0x33FF), # CJK Compatibility
|
|---|
| 537 | (0x3400, 0x4DBF), # CJK Unified Ideographs Extension A
|
|---|
| 538 | (0x4E00, 0x9FFF), # CJK Unified Ideographs
|
|---|
| 539 | (0xA960, 0xA97F), # Hangul Jamo Extended-A
|
|---|
| 540 | (0xAC00, 0xD7AF), # Hangul Syllables
|
|---|
| 541 | (0xD7B0, 0xD7FF), # Hangul Jamo Extended-B
|
|---|
| 542 | (0xF900, 0xFAFF), # CJK Compatibility Ideographs
|
|---|
| 543 | (0xFE30, 0xFE4F), # CJK Compatibility Forms
|
|---|
| 544 | (0xFF00, 0xFFEF), # Halfwidth and Fullwidth Forms
|
|---|
| 545 | (0x20000, 0x2FFFF, '[\uD840-\uD87F][\uDC00-\uDFFF]'), # Plane 2
|
|---|
| 546 | (0x30000, 0x3FFFF, '[\uD880-\uD8BF][\uDC00-\uDFFF]'), # Plane 3
|
|---|
| 547 | ]
|
|---|
| 548 |
|
|---|
| 549 | split_re = None
|
|---|
| 550 | breakable_re = None
|
|---|
| 551 |
|
|---|
| 552 | @classmethod
|
|---|
| 553 | def _init_patterns(cls):
|
|---|
| 554 | char_ranges = []
|
|---|
| 555 | for val in cls.breakable_char_ranges:
|
|---|
| 556 | high = chr(val[0])
|
|---|
| 557 | low = chr(val[1])
|
|---|
| 558 | char_ranges.append('%s-%s' % (high, low))
|
|---|
| 559 | char_ranges = ''.join(char_ranges)
|
|---|
| 560 | pattern = '[%s]+' % char_ranges
|
|---|
| 561 |
|
|---|
| 562 | cls.split_re = re.compile(
|
|---|
| 563 | r'(\s+|' + # any whitespace
|
|---|
| 564 | pattern + '|' + # breakable text
|
|---|
| 565 | r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' + # hyphenated words
|
|---|
| 566 | r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))', # em-dash
|
|---|
| 567 | re.UNICODE)
|
|---|
| 568 | cls.breakable_re = re.compile(r'\A' + pattern, re.UNICODE)
|
|---|
| 569 |
|
|---|
| 570 | def __init__(self, cols, replace_whitespace=0, break_long_words=0,
|
|---|
| 571 | initial_indent='', subsequent_indent='', ambiwidth=1):
|
|---|
| 572 | textwrap.TextWrapper.__init__(
|
|---|
| 573 | self, cols, replace_whitespace=0, break_long_words=0,
|
|---|
| 574 | initial_indent=initial_indent,
|
|---|
| 575 | subsequent_indent=subsequent_indent)
|
|---|
| 576 | self.ambiwidth = ambiwidth
|
|---|
| 577 | if self.split_re is None:
|
|---|
| 578 | self._init_patterns()
|
|---|
| 579 |
|
|---|
| 580 | def _split(self, text):
|
|---|
| 581 | chunks = self.split_re.split(to_unicode(text))
|
|---|
| 582 | return list(filter(None, chunks))
|
|---|
| 583 |
|
|---|
| 584 | def _text_width(self, text):
|
|---|
| 585 | return text_width(text, ambiwidth=self.ambiwidth)
|
|---|
| 586 |
|
|---|
| 587 | def _wrap_chunks(self, chunks):
|
|---|
| 588 | lines = []
|
|---|
| 589 | chunks.reverse()
|
|---|
| 590 | text_width = self._text_width
|
|---|
| 591 |
|
|---|
| 592 | while chunks:
|
|---|
| 593 | cur_line = []
|
|---|
| 594 | cur_width = 0
|
|---|
| 595 |
|
|---|
| 596 | if lines:
|
|---|
| 597 | indent = self.subsequent_indent
|
|---|
| 598 | else:
|
|---|
| 599 | indent = self.initial_indent
|
|---|
| 600 | width = self.width - text_width(indent)
|
|---|
| 601 |
|
|---|
| 602 | if chunks[-1].strip() == '' and lines:
|
|---|
| 603 | del chunks[-1]
|
|---|
| 604 |
|
|---|
| 605 | while chunks:
|
|---|
| 606 | chunk = chunks[-1]
|
|---|
| 607 | w = text_width(chunk)
|
|---|
| 608 | if cur_width + w <= width:
|
|---|
| 609 | cur_line.append(chunks.pop())
|
|---|
| 610 | cur_width += w
|
|---|
| 611 | elif self.breakable_re.match(chunk):
|
|---|
| 612 | left_space = width - cur_width
|
|---|
| 613 | for i in range(len(chunk)):
|
|---|
| 614 | w = text_width(chunk[i])
|
|---|
| 615 | if left_space < w:
|
|---|
| 616 | break
|
|---|
| 617 | left_space -= w
|
|---|
| 618 | if i > 0:
|
|---|
| 619 | cur_line.append(chunk[:i])
|
|---|
| 620 | chunk = chunk[i:]
|
|---|
| 621 | chunks[-1] = chunk
|
|---|
| 622 | w = text_width(chunk)
|
|---|
| 623 | break
|
|---|
| 624 | else:
|
|---|
| 625 | break
|
|---|
| 626 |
|
|---|
| 627 | if chunks and w > width:
|
|---|
| 628 | self._handle_long_word(chunks, cur_line, cur_width, width)
|
|---|
| 629 |
|
|---|
| 630 | if cur_line and cur_line[-1].strip() == '':
|
|---|
| 631 | del cur_line[-1]
|
|---|
| 632 |
|
|---|
| 633 | if cur_line:
|
|---|
| 634 | lines.append(indent + ''.join(cur_line))
|
|---|
| 635 |
|
|---|
| 636 | return lines
|
|---|
| 637 |
|
|---|
| 638 |
|
|---|
| 639 | def wrap(t, cols=75, initial_indent='', subsequent_indent='',
|
|---|
| 640 | linesep=os.linesep, ambiwidth=1):
|
|---|
| 641 | """Wraps the single paragraph in `t`, which contains unicode characters.
|
|---|
| 642 | The every line is at most `cols` characters long.
|
|---|
| 643 |
|
|---|
| 644 | That `ambiwidth` parameter is used for the column width of the East
|
|---|
| 645 | Asian Ambiguous (A). If `1`, the same width as characters in US-ASCII.
|
|---|
| 646 | This is expected by most users. If `2`, twice the width of US-ASCII
|
|---|
| 647 | characters. This is expected by CJK users.
|
|---|
| 648 | """
|
|---|
| 649 | t = t.strip().replace('\r\n', '\n').replace('\r', '\n')
|
|---|
| 650 | wrapper = UnicodeTextWrapper(cols, replace_whitespace=0,
|
|---|
| 651 | break_long_words=0,
|
|---|
| 652 | initial_indent=initial_indent,
|
|---|
| 653 | subsequent_indent=subsequent_indent,
|
|---|
| 654 | ambiwidth=ambiwidth)
|
|---|
| 655 | wrappedLines = []
|
|---|
| 656 | for line in t.split('\n'):
|
|---|
| 657 | wrappedLines += wrapper.wrap(line.rstrip()) or ['']
|
|---|
| 658 | return linesep.join(wrappedLines)
|
|---|
| 659 |
|
|---|
| 660 |
|
|---|
| 661 | _obfuscation_char = '@\u2026'
|
|---|
| 662 |
|
|---|
| 663 | def obfuscate_email_address(address):
|
|---|
| 664 | """Replace anything looking like an e-mail address (``'@something'``)
|
|---|
| 665 | with a trailing ellipsis (``'@…'``)
|
|---|
| 666 | """
|
|---|
| 667 | if address:
|
|---|
| 668 | at = address.find('@')
|
|---|
| 669 | if at != -1:
|
|---|
| 670 | return address[:at] + _obfuscation_char + \
|
|---|
| 671 | ('>' if address[-1] == '>' else '')
|
|---|
| 672 | return address
|
|---|
| 673 |
|
|---|
| 674 |
|
|---|
| 675 | def is_obfuscated(word):
|
|---|
| 676 | """Returns `True` if the `word` looks like an obfuscated e-mail
|
|---|
| 677 | address.
|
|---|
| 678 |
|
|---|
| 679 | :since: 1.2
|
|---|
| 680 | """
|
|---|
| 681 | return _obfuscation_char in word
|
|---|
| 682 |
|
|---|
| 683 |
|
|---|
| 684 | def breakable_path(path):
|
|---|
| 685 | """Make a path breakable after path separators, and conversely, avoid
|
|---|
| 686 | breaking at spaces.
|
|---|
| 687 | """
|
|---|
| 688 | if not path:
|
|---|
| 689 | return path
|
|---|
| 690 | prefix = ''
|
|---|
| 691 | if path.startswith('/'): # Avoid breaking after a leading /
|
|---|
| 692 | prefix = '/'
|
|---|
| 693 | path = path[1:]
|
|---|
| 694 | return prefix + path.replace('/', '/\u200b').replace('\\', '\\\u200b') \
|
|---|
| 695 | .replace(' ', '\u00a0')
|
|---|
| 696 |
|
|---|
| 697 |
|
|---|
| 698 | def normalize_whitespace(text, to_space='\u00a0', remove='\u200b'):
|
|---|
| 699 | """Normalize whitespace in a string, by replacing special spaces by normal
|
|---|
| 700 | spaces and removing zero-width spaces."""
|
|---|
| 701 | if not text:
|
|---|
| 702 | return text
|
|---|
| 703 | for each in to_space:
|
|---|
| 704 | text = text.replace(each, ' ')
|
|---|
| 705 | for each in remove:
|
|---|
| 706 | text = text.replace(each, '')
|
|---|
| 707 | return text
|
|---|
| 708 |
|
|---|
| 709 |
|
|---|
| 710 | def unquote_label(txt):
|
|---|
| 711 | """Remove (one level of) enclosing single or double quotes.
|
|---|
| 712 |
|
|---|
| 713 | .. versionadded :: 1.0
|
|---|
| 714 | """
|
|---|
| 715 | return txt[1:-1] if txt and txt[0] in "'\"" and txt[0] == txt[-1] else txt
|
|---|
| 716 |
|
|---|
| 717 |
|
|---|
| 718 | def cleandoc(message):
|
|---|
| 719 | """Removes uniform indentation and leading/trailing whitespace."""
|
|---|
| 720 | from inspect import cleandoc
|
|---|
| 721 | return cleandoc(message).strip()
|
|---|
| 722 |
|
|---|
| 723 |
|
|---|
| 724 | # -- Conversion
|
|---|
| 725 |
|
|---|
| 726 | def pretty_size(size, format='%.1f'):
|
|---|
| 727 | """Pretty print content size information with appropriate unit.
|
|---|
| 728 |
|
|---|
| 729 | :param size: number of bytes
|
|---|
| 730 | :param format: can be used to adjust the precision shown
|
|---|
| 731 | """
|
|---|
| 732 | if size is None:
|
|---|
| 733 | return ''
|
|---|
| 734 |
|
|---|
| 735 | jump = 1024
|
|---|
| 736 | if size < jump:
|
|---|
| 737 | from trac.util.translation import ngettext
|
|---|
| 738 | return ngettext("%(num)d byte", "%(num)d bytes", num=size)
|
|---|
| 739 |
|
|---|
| 740 | units = ['KB', 'MB', 'GB', 'TB']
|
|---|
| 741 | i = 0
|
|---|
| 742 | while size >= jump and i < len(units):
|
|---|
| 743 | i += 1
|
|---|
| 744 | size /= 1024.
|
|---|
| 745 |
|
|---|
| 746 | return (format + ' %s') % (size, units[i - 1])
|
|---|
| 747 |
|
|---|
| 748 |
|
|---|
| 749 | def expandtabs(s, tabstop=8, ignoring=None):
|
|---|
| 750 | """Expand tab characters `'\\\\t'` into spaces.
|
|---|
| 751 |
|
|---|
| 752 | :param tabstop: number of space characters per tab
|
|---|
| 753 | (defaults to the canonical 8)
|
|---|
| 754 |
|
|---|
| 755 | :param ignoring: if not `None`, the expansion will be "smart" and
|
|---|
| 756 | go from one tabstop to the next. In addition,
|
|---|
| 757 | this parameter lists characters which can be
|
|---|
| 758 | ignored when computing the indent.
|
|---|
| 759 | """
|
|---|
| 760 | if '\t' not in s:
|
|---|
| 761 | return s
|
|---|
| 762 | if ignoring is None:
|
|---|
| 763 | return s.expandtabs(tabstop)
|
|---|
| 764 |
|
|---|
| 765 | outlines = []
|
|---|
| 766 | for line in s.split('\n'):
|
|---|
| 767 | if '\t' not in line:
|
|---|
| 768 | outlines.append(line)
|
|---|
| 769 | continue
|
|---|
| 770 | p = 0
|
|---|
| 771 | s = []
|
|---|
| 772 | for c in line:
|
|---|
| 773 | if c == '\t':
|
|---|
| 774 | n = tabstop - p % tabstop
|
|---|
| 775 | s.append(' ' * n)
|
|---|
| 776 | p += n
|
|---|
| 777 | elif not ignoring or c not in ignoring:
|
|---|
| 778 | p += 1
|
|---|
| 779 | s.append(c)
|
|---|
| 780 | else:
|
|---|
| 781 | s.append(c)
|
|---|
| 782 | outlines.append(''.join(s))
|
|---|
| 783 | return '\n'.join(outlines)
|
|---|
| 784 |
|
|---|
| 785 |
|
|---|
| 786 | def fix_eol(text, eol):
|
|---|
| 787 | """Fix end-of-lines in a text."""
|
|---|
| 788 | lines = text.splitlines()
|
|---|
| 789 | if isinstance(text, bytes):
|
|---|
| 790 | last = b''
|
|---|
| 791 | eol = eol.encode('utf-8')
|
|---|
| 792 | else:
|
|---|
| 793 | last = ''
|
|---|
| 794 | lines.append(last)
|
|---|
| 795 | return eol.join(lines)
|
|---|
| 796 |
|
|---|
| 797 | def unicode_to_base64(text, strip_newlines=True):
|
|---|
| 798 | """Safe conversion of ``text`` to base64 representation using
|
|---|
| 799 | utf-8 bytes.
|
|---|
| 800 |
|
|---|
| 801 | Strips newlines from output unless ``strip_newlines`` is `False`.
|
|---|
| 802 | """
|
|---|
| 803 | text = to_unicode(text)
|
|---|
| 804 | text = text.encode('utf-8')
|
|---|
| 805 | if strip_newlines:
|
|---|
| 806 | rv = base64.b64encode(text)
|
|---|
| 807 | else:
|
|---|
| 808 | rv = base64.encodebytes(text)
|
|---|
| 809 | return str(rv, 'ascii')
|
|---|
| 810 |
|
|---|
| 811 | def unicode_from_base64(text):
|
|---|
| 812 | """Safe conversion of ``text`` to str based on utf-8 bytes."""
|
|---|
| 813 | return str(base64.b64decode(text), 'utf-8')
|
|---|
| 814 |
|
|---|
| 815 |
|
|---|
| 816 | def levenshtein_distance(lhs, rhs):
|
|---|
| 817 | """Return the Levenshtein distance between two strings."""
|
|---|
| 818 | if len(lhs) > len(rhs):
|
|---|
| 819 | rhs, lhs = lhs, rhs
|
|---|
| 820 | if not lhs:
|
|---|
| 821 | return len(rhs)
|
|---|
| 822 |
|
|---|
| 823 | prev = range(len(rhs) + 1)
|
|---|
| 824 | for lidx, lch in enumerate(lhs):
|
|---|
| 825 | curr = [lidx + 1]
|
|---|
| 826 | for ridx, rch in enumerate(rhs):
|
|---|
| 827 | cost = (lch != rch) * 2
|
|---|
| 828 | curr.append(min(prev[ridx + 1] + 1, # deletion
|
|---|
| 829 | curr[ridx] + 1, # insertion
|
|---|
| 830 | prev[ridx] + cost)) # substitution
|
|---|
| 831 | prev = curr
|
|---|
| 832 | return prev[-1]
|
|---|
| 833 |
|
|---|
| 834 |
|
|---|
| 835 | sub_vars_re = re.compile("[$]([A-Z_][A-Z0-9_]*)")
|
|---|
| 836 |
|
|---|
| 837 | def sub_vars(text, args):
|
|---|
| 838 | """Substitute $XYZ-style variables in a string with provided values.
|
|---|
| 839 |
|
|---|
| 840 | :param text: string containing variables to substitute.
|
|---|
| 841 | :param args: dictionary with keys matching the variables to be substituted.
|
|---|
| 842 | The keys should not be prefixed with the $ character."""
|
|---|
| 843 | def repl(match):
|
|---|
| 844 | key = match.group(1)
|
|---|
| 845 | return args[key] if key in args else '$' + key
|
|---|
| 846 | return sub_vars_re.sub(repl, text)
|
|---|