ChristianBoos: mimeview_conversion.diff
| File mimeview_conversion.diff, 50.0 kB (added by cboos, 2 years ago) |
|---|
-
trac/attachment.py
432 432 fd.seek(0) 433 433 434 434 binary = is_binary(str_data) 435 mime _type = mimeview.get_mimetype(attachment.filename, str_data)435 mimetype = mimeview.get_mimetype(attachment.filename, str_data) 436 436 437 437 # Eventually send the file directly 438 438 format = req.args.get('format') … … 442 442 # contain malicious code enabling XSS attacks 443 443 req.send_header('Content-Disposition', 'attachment;' + 444 444 'filename=' + attachment.filename) 445 if not mime _type or (self.render_unsafe_content and \445 if not mimetype or (self.render_unsafe_content and \ 446 446 not binary and format == 'txt'): 447 mime_type = 'text/plain' 448 if 'charset=' not in mime_type: 449 charset = mimeview.get_charset(str_data, mime_type) 450 mime_type = mime_type + '; charset=' + charset 451 req.send_file(attachment.path, mime_type) 447 mimetype = 'text/plain' 448 full_mimetype = mimeview.get_mimetype_charset( 449 attachment.filename, str_data, mimetype) 450 req.send_file(attachment.path, full_mimetype) 452 451 453 452 # add ''Plain Text'' alternate link if needed 454 453 if self.render_unsafe_content and not binary and \ 455 not mime _type.startswith('text/plain'):454 not mimetype.startswith('text/plain'): 456 455 plaintext_href = attachment.href(req, format='txt') 457 456 add_link(req, 'alternate', plaintext_href, 'Plain Text', 458 mime _type)457 mimetype) 459 458 460 459 # add ''Original Format'' alternate link (always) 461 460 raw_href = attachment.href(req, format='raw') 462 add_link(req, 'alternate', raw_href, 'Original Format', mime _type)461 add_link(req, 'alternate', raw_href, 'Original Format', mimetype) 463 462 464 463 self.log.debug("Rendering preview of file %s with mime-type %s" 465 % (attachment.filename, mime _type))464 % (attachment.filename, mimetype)) 466 465 467 466 req.hdf['attachment'] = mimeview.preview_to_hdf( 468 req, fd, os.fstat(fd.fileno()).st_size, mime _type,467 req, fd, os.fstat(fd.fileno()).st_size, mimetype, 469 468 attachment.filename, raw_href, annotations=['lineno']) 470 469 finally: 471 470 fd.close() -
trac/mimeview/api.py
27 27 * taking advantage of existing conventions for the file name 28 28 * examining the file content and applying various heuristics 29 29 30 The module also knows how to convert the file content from one type31 to another type.30 The module also knows about conversions from one data type to another type, 31 like conversions to text/html (this is no more a special case). 32 32 33 33 In some cases, only the `url` pointing to the file's content is actually 34 34 needed, that's why we avoid to read the file's content when it's not needed. … … 49 49 50 50 51 51 __all__ = ['get_mimetype', 'is_binary', 'detect_unicode', 'Mimeview', 52 'content_to_unicode'] 52 'content_to_unicode', 53 'combine_mimetype_charset', 'split_mimetype_charset'] 53 54 54 55 55 56 # Some common MIME types and their associated keywords and/or file extensions … … 153 154 return 'application/octet-stream' 154 155 return mimetype 155 156 157 def combine_mimetype_charset(mimetype, charset): 158 """Combine the MIME type and charset information in a single string.""" 159 if mimetype and charset and not 'charset' in mimetype: 160 return '%s; charset=%s' % (mimetype, charset) 161 else: 162 return mimetype 163 164 def split_mimetype_charset(full_mimetype): 165 """Return (mimetype, charset) from the combined information""" 166 mimetype = full_mimetype 167 charset = None 168 idx = full_mimetype.find(';') 169 if idx >= 0: 170 mimetype = full_mimetype[:idx].strip() 171 idx = full_mimetype.find('charset=', idx) 172 if idx >= -1: 173 charset = full_mimetype[idx+8:].strip() 174 return mimetype, charset 175 156 176 def is_binary(data): 157 177 """Detect binary content by checking the first thousand bytes for zeroes. 158 178 … … 176 196 else: 177 197 return None 178 198 199 # Deprecated (TODO: remove in 0.11) 200 179 201 def content_to_unicode(env, content, mimetype): 180 """Retrieve an `unicode` object from a `content` to be previewed""" 181 mimeview = Mimeview(env) 182 if hasattr(content, 'read'): 183 content = content.read(mimeview.max_preview_size) 184 return mimeview.to_unicode(content, mimetype) 202 """Retrieve an `unicode` object from a `content` to be previewed. 203 ''Deprecated in 0.10.'' 204 """ 205 return Mimeview(env).fetch_content(content, mimetype) 185 206 186 207 187 208 class IHTMLPreviewRenderer(Interface): 188 209 """Extension point interface for components that add HTML renderers of 189 210 specific content types to the `Mimeview` component. 190 211 191 (Deprecated)212 Deprecated in 0.10. Implement `IContentConverter` instead. 192 213 """ 193 214 194 215 # implementing classes should set this property to True if they … … 196 217 expand_tabs = False 197 218 198 219 def get_quality_ratio(mimetype): 199 """Return the level of support this renderer provides for the `content` 200 of the specified MIME type. The return value must be a number between 201 0 and 9, where 0 means no support and 9 means "perfect" support. 202 """ 220 """Return the level of support this renderer provides""" 203 221 204 222 def render(req, mimetype, content, filename=None, url=None): 205 """Render an XHTML preview of the raw `content`. 223 """Render an XHTML preview of the raw `content`.""" 206 224 207 The `content` might be:208 * a `str` object209 * an `unicode` string210 * any object with a `read` method, returning one of the above211 225 212 It is assumed that the content will correspond to the given `mimetype`.213 214 Besides the `content` value, the same content may eventually215 be available through the `filename` or `url` parameters.216 This is useful for renderers that embed objects, using <object> or217 <img> instead of including the content inline.218 219 Can return the generated XHTML text as a single string or as an220 iterable that yields strings. In the latter case, the list will221 be considered to correspond to lines of text in the original content.222 """223 224 226 class IHTMLPreviewAnnotator(Interface): 225 227 """Extension point interface for components that can annotate an XHTML 226 228 representation of file contents with additional information.""" … … 238 240 annotation data.""" 239 241 240 242 243 class Conversion(object): 244 """A data conversion specification. 245 246 The conversion goes from an `in_type` to an `out_type`. 247 A conversion is identified by a `key`, has a `name` and proposes 248 an `extension` that can be used for storing the converted data in a file. 249 250 The `quality` ratio of the conversion is a number in the range 0 to 9, 251 where 0 means no support and 9 means "perfect" support. 252 253 Finally, `expand_tabs` indicates whether a tab expansion should precede 254 the conversion attempt. 255 256 e.g. Conversion(key='latex', name='LaTeX', extension='tex', 257 in_type='text/x-trac-wiki', out_type='text/x-tex', 258 quality=8) 259 """ 260 261 def __init__(self, key, name=None, extension='', 262 in_type=None, out_type='text/html', 263 quality=1, expand_tabs=False): 264 self.key = key 265 self.name = name or key 266 self.extension = extension 267 self.in_type = in_type 268 self.out_type = out_type 269 self.quality = quality 270 self.expand_tabs = expand_tabs 271 272 def __repr__(self): 273 return '<Conversion "%s" %s -> %s>' % \ 274 (self.key, self.in_type, self.out_type) 275 276 241 277 class IContentConverter(Interface): 242 278 """An extension point interface for generic MIME based content 243 279 conversion.""" 244 280 245 def get_supported_conversions(): 246 """Return an iterable of tuples in the form (key, name, extension, 247 in_mimetype, out_mimetype, quality) representing the MIME conversions 248 supported and 249 the quality ratio of the conversion in the range 0 to 9, where 0 means 250 no support and 9 means "perfect" support. eg. ('latex', 'LaTeX', 'tex', 251 'text/x-trac-wiki', 'text/plain', 8)""" 281 def get_supported_conversions(mimetype): 282 """Check if conversion of `mimetype` is supported by this converter. 252 283 253 def convert_content(req, mimetype, content, key): 254 """Convert the given content from mimetype to the output MIME type 255 represented by key. Returns a tuple in the form (content, 256 output_mime_type).""" 284 Return an iterable of `Conversion` objects if this is the case. 285 """ 257 286 287 def convert_content(req, conversion, content, filename, url): 288 """Convert the given `content` using the specified `conversion` object. 258 289 290 If not directly available through the `content` value, 291 the content may be available through the `filename` or `url` 292 arguments. 293 This can be useful for converters that can provide links to objects, 294 instead of having to inline the content. 295 296 Return the converted content. 297 """ 298 299 259 300 class Mimeview(Component): 260 301 """A generic class to prettify data, typically source code.""" 261 302 262 renderers = ExtensionPoint(IHTMLPreviewRenderer) 303 renderers = ExtensionPoint(IHTMLPreviewRenderer) # TODO: remove in 0.11 263 304 annotators = ExtensionPoint(IHTMLPreviewAnnotator) 264 305 converters = ExtensionPoint(IContentConverter) 265 306 … … 282 323 def __init__(self): 283 324 self._mime_map = None 284 325 285 # Public API 326 # -- MIME type conversion 327 328 def get_supported_conversions(self, mimetype, content=None, filename=None): 329 """Return a list of possible conversions for the given `content`. 286 330 287 def get_supported_conversions(self, mimetype): 288 """Return a list of target MIME types in same form as 289 `IContentConverter.get_supported_conversions()`, but with the converter 290 component appended. Output is ordered from best to worst quality.""" 331 The input `mimetype` is inferred from the `content` and/or the 332 `filename`, if not given. 333 334 Return a list of (conversion,converter), ordered from best 335 to worst quality. 336 """ 337 # Ensure we have a mimetype and only the mimetype, without the charset 338 if mimetype: 339 mimetype, charset = split_mimetype_charset(mimetype) 340 else: 341 mimetype = self.get_mimetype(filename, content) or 'text/plain' 342 343 # Build list of possible conversions, with their associated converters 291 344 converters = [] 292 345 for converter in self.converters: 293 for k, n, e, im, om, q in converter.get_supported_conversions(): 294 if im == mimetype and q > 0: 295 converters.append((k, n, e, im, om, q, converter)) 296 converters = sorted(converters, key=lambda i: i[-1], reverse=True) 297 return converters 346 print converter 347 for conversion in converter.get_supported_conversions(mimetype): 348 if conversion.quality > 0: 349 converters.append((conversion, converter)) 298 350 299 def convert_content(self, req, mimetype, content, key, filename=None, 300 url=None): 301 """Convert the given content to the target MIME type represented by 302 `key`, which can be either a MIME type or a key. Returns a tuple of 303 (content, output_mime_type, extension).""" 351 # ---- Backward compatibility support for IHTMLPreviewRenderer 352 class RendererWrapper(object): 353 def __init__(self, renderer): 354 self.renderer = renderer 355 def convert_content(self, req, conversion, content, 356 filename=None, url=None): 357 return self.renderer.render(req, conversion.in_type, 358 content, filename, url) 359 for renderer in self.renderers: 360 qr = renderer.get_quality_ratio(mimetype) 361 if qr > 0: 362 expand_tabs = getattr(renderer, 'expand_tabs', False) 363 converters.append( 364 (Conversion(key='', name='', extension=None, 365 in_type=mimetype, out_type='text/html', 366 quality=8, expand_tabs=expand_tabs), 367 RendererWrapper(renderer))) 368 # ---- (to be removed in 0.11) 369 370 return sorted(converters, key=lambda c: c[0].quality, reverse=True) 371 372 def convert_content(self, req, content, mimetype, selector, 373 filename=None, url=None): 374 """Convert the `content` to targeted MIME type specified by 'selector'. 375 376 The content has the MIME type `mimetype` and the target MIME type 377 is determined by `selector`, which can be either directly the 378 output MIME type or a key identifying the Conversion. 379 380 Returns a tuple of (content, output_mime_type, extension). 381 """ 304 382 if not content: 305 return ('', 'text/plain; charset=utf-8')383 return ('', 'text/plain; charset=utf-8', '') 306 384 307 # Ensure we have a MIME type for this content 308 full_mimetype = mimetype 309 if not full_mimetype: 310 if hasattr(content, 'read'): 311 content = content.read(self.max_preview_size) 312 full_mimetype = self.get_mimetype(filename, content) 313 if full_mimetype: 314 mimetype = full_mimetype.split(';')[0].strip() # split off charset 315 else: 316 mimetype = full_mimetype = 'text/plain' # fallback if not binary 385 # Ensure we have the mimetype and the charset information 386 print `('cc', filename, content, mimetype)` 387 full_mimetype = self.get_mimetype_charset(filename, content, mimetype) 388 mimetype, charset = split_mimetype_charset(full_mimetype) 317 389 318 # Choose best converter 319 candidates = self.get_supported_conversions(mimetype) 320 candidates = [c for c in candidates if key in (c[0], c[4])] 390 # Filter the converters of `mimetype` that are matching `selector` 391 candidates = self.get_supported_conversions(mimetype, content, filename) 392 candidates = [c for c in candidates 393 if selector in (c[0].key, c[0].out_type)] 321 394 if not candidates: 322 395 raise TracError('No available MIME conversions from %s to %s' % 323 (mimetype, key))396 (mimetype, selector)) 324 397 398 tab_expanded = False # we don't want to expand tabs more than once. 399 325 400 # First candidate which converts successfully wins. 326 for ck, name, ext, input_mimettype, output_mimetype, quality, \ 327 converter in candidates: 401 for conversion, converter in candidates: 402 if conversion.expand_tabs and not tab_expanded: 403 content = self.fetch_content(content, full_mimetype) 404 content = content.expandtabs(self.tab_width) 405 tab_expanded = True 328 406 try: 329 output = converter.convert_content(req, mimetype, content, ck) 407 output = converter.convert_content(req, conversion, content, 408 filename, url) 330 409 if not output: 331 410 continue 332 return (output[0], output[1], ext)411 return (output[0], output[1], conversion.extension) 333 412 except Exception, e: 334 413 self.log.warning('MIME conversion using %s failed (%s)' 335 414 % (converter, e), exc_info=True) 336 raise TracError('No available MIME conversions from %s to %s' %337 (mimetype, key))415 raise TracError('No MIME conversions from %s to %s succeeded' % 416 (mimetype, selector)) 338 417 418 # -- XHTML rendering and annotations (based on the conversion API) 419 339 420 def get_annotation_types(self): 340 421 """Generator that returns all available annotation types.""" 341 422 for annotator in self.annotators: … … 343 424 344 425 def render(self, req, mimetype, content, filename=None, url=None, 345 426 annotations=None): 346 """Render an XHTML preview of the given `content`. 347 348 `content` is the same as an `IHTMLPreviewRenderer.render`'s 349 `content` argument. 350 351 The specified `mimetype` will be used to select the most appropriate 352 `IHTMLPreviewRenderer` implementation available for this MIME type. 353 If not given, the MIME type will be infered from the filename or the 354 content. 355 356 Return a string containing the XHTML text. 427 """Render an XHTML preview of the given `content`, with `annotations`. 357 428 """ 358 if not content: 359 return '' 360 361 # Ensure we have a MIME type for this content 362 full_mimetype = mimetype 363 if not full_mimetype: 364 if hasattr(content, 'read'): 365 content = content.read(self.max_preview_size) 366 full_mimetype = self.get_mimetype(filename, content) 367 if full_mimetype: 368 mimetype = full_mimetype.split(';')[0].strip() # split off charset 429 result, output_type, ext = self.convert_content( 430 req, content, mimetype, 'text/html', filename, url) 431 print `result` 432 if isinstance(result, Fragment): 433 return result 434 elif isinstance(result, basestring): 435 return Markup(to_unicode(result)) 436 elif annotations: 437 return Markup(self._annotate(result, annotations)) 369 438 else: 370 mimetype = full_mimetype = 'text/plain' # fallback if not binary 439 buf = StringIO() 440 buf.write('<div class="code"><pre>') 441 for line in result: 442 buf.write(line + '\n') 443 buf.write('</pre></div>') 444 return Markup(buf.getvalue()) 371 445 372 # Determine candidate `IHTMLPreviewRenderer`s373 candidates = []374 for renderer in self.renderers:375 qr = renderer.get_quality_ratio(mimetype)376 if qr > 0:377 candidates.append((qr, renderer))378 candidates.sort(lambda x,y: cmp(y[0], x[0]))379 380 # First candidate which renders successfully wins.381 # Also, we don't want to expand tabs more than once.382 expanded_content = None383 for qr, renderer in candidates:384 try:385 self.log.debug('Trying to render HTML preview using %s'386 % renderer.__class__.__name__)387 # check if we need to perform a tab expansion388 rendered_content = content389 if getattr(renderer, 'expand_tabs', False):390 if expanded_content is None:391 content = content_to_unicode(self.env, content,392 full_mimetype)393 expanded_content = content.expandtabs(self.tab_width)394 rendered_content = expanded_content395 result = renderer.render(req, full_mimetype, rendered_content,396 filename, url)397 if not result:398 continue399 elif isinstance(result, Fragment):400 return result401 elif isinstance(result, basestring):402 return Markup(to_unicode(result))403 elif annotations:404 return Markup(self._annotate(result, annotations))405 else:406 buf = StringIO()407 buf.write('<div class="code"><pre>')408 for line in result:409 buf.write(line + '\n')410 buf.write('</pre></div>')411 return Markup(buf.getvalue())412 except Exception, e:413 self.log.warning('HTML preview using %s failed (%s)'414 % (renderer, e), exc_info=True)415 416 446 def _annotate(self, lines, annotations): 417 447 buf = StringIO() 418 448 buf.write('<table class="code"><thead><tr>') … … 447 477 buf.write('</tbody></table>') 448 478 return buf.getvalue() 449 479 450 def get_max_preview_size(self): 451 """Deprecated: use `max_preview_size` attribute directly.""" 452 return self.max_preview_size 453 480 # -- MIME type and charset detection 481 454 482 def get_charset(self, content='', mimetype=None): 455 483 """Infer the character encoding from the `content` or the `mimetype`. 456 484 … … 459 487 The charset will be determined using this order: 460 488 * from the charset information present in the `mimetype` argument 461 489 * auto-detection of the charset from the `content` 462 * the configured `default_charset` 490 * the configured `default_charset` 463 491 """ 464 492 if mimetype: 465 ctpos = mimetype.find('charset=')466 if c tpos >= 0:467 return mimetype[ctpos + 8:].strip()493 mimetype, charset = split_mimetype_charset(mimetype) 494 if charset: 495 return charset 468 496 if isinstance(content, str): 469 497 utf = detect_unicode(content) 470 498 if utf is not None: 471 499 return utf 500 # TODO: ICharsetDetector 472 501 return self.default_charset 473 502 474 503 def get_mimetype(self, filename, content=None): 475 504 """Infer the MIME type from the `filename` or the `content`. 476 505 477 `content` is either a `str` or an `unicode` object. 506 `content` is either a `str` or an `unicode` object, 507 or something that can be `read`. 478 508 479 Return the detected MIME type, augmented by the 480 charset information (i.e. "<mimetype>; charset=..."), 481 or `None` if detection failed. 509 Return the detected MIME type or `None` if detection failed. 482 510 """ 483 511 # Extend default extension to MIME type mappings with configured ones 484 512 if not self._mime_map: … … 489 517 for keyword in assocations: # Note: [0] kept on purpose 490 518 self._mime_map[keyword] = assocations[0] 491 519 492 mimetype = get_mimetype(filename, content, self._mime_map) 520 # read the content only if there's no other way to get the mimetype 521 if hasattr(content, 'read'): 522 # first try to get the mimetype from the filename only 523 mimetype = get_mimetype(filename, None, self._mime_map) 524 if mimetype: 525 return mimetype 526 content = self.fetch_content(content, mimetype) 527 return get_mimetype(filename, content, self._mime_map) 528 529 def get_mimetype_charset(self, filename, content=None, mimetype=None): 530 """Retrieve combined mimetype and charset information. 531 532 If `mimetype` is given, we check if it provides the needed information, 533 otherwise we try to detect the mimetype and/or the charset. 534 """ 535 print `('gmc', filename, content, mimetype)` 493 536 charset = None 537 if not mimetype: 538 mimetype = self.get_mimetype(filename, content) 539 print `('gmc2', mimetype)` 494 540 if mimetype: 541 if 'charset=' in mimetype: 542 return mimetype 495 543 charset = self.get_charset(content, mimetype) 496 if mimetype and charset and not 'charset' in mimetype: 497 mimetype += '; charset=' + charset 498 return mimetype 544 return combine_mimetype_charset(mimetype, charset) 499 545 500 def to_utf8(self, content, mimetype=None): 501 """Convert an encoded `content` to utf-8. 502 503 ''Deprecated in 0.10. You should use `unicode` strings only.'' 504 """ 505 return to_utf8(content, self.get_charset(content, mimetype)) 506 546 # -- Charset conversion 547 507 548 def to_unicode(self, content, mimetype=None, charset=None): 508 549 """Convert `content` (an encoded `str` object) to an `unicode` object. 509 550 … … 514 555 charset = self.get_charset(content, mimetype) 515 556 return to_unicode(content, charset) 516 557 558 def fetch_content(self, content, mimetype): 559 if hasattr(content, 'read'): 560 content = content.read(self.max_preview_size) 561 return self.to_unicode(content, mimetype) 562 563 # -- Deprecated API (TODO: remove in 0.11) 564 565 def get_max_preview_size(self): 566 """Deprecated: use `max_preview_size` attribute directly.""" 567 return self.max_preview_size 568 569 def to_utf8(self, content, mimetype=None): 570 """Convert an encoded `content` to utf-8. 571 572 ''Deprecated in 0.10. You should use `unicode` strings only.'' 573 """ 574 return to_utf8(content, self.get_charset(content, mimetype)) 575 576 # -- Utilities 577 517 578 def configured_modes_mapping(self, renderer): 518 """Return a MIME type to `(mode,quality)` mapping for given `option`""" 579 """Utility for configurable custom converters 580 581 Return a MIME type to `(mode,quality)` mapping for given `option`, 582 assuming a format of comma-separated <mimetype>:<mode>:<quality> 583 associations. 584 585 See EnscriptConverter and SilverCityConverter. 586 """ 519 587 types, option = {}, '%s_modes' % renderer 520 588 for mapping in self.config['mimeviewer'].getlist(option): 521 589 if not mapping: … … 543 611 url, annotations), 544 612 'raw_href': url} 545 613 546 def send_converted(self, req, in_type, content, selector, filename='file'): 614 def send_converted(self, req, content, mimetype, selector, 615 filename='file'): 547 616 """Helper method for converting `content` and sending it directly. 548 617 549 `selector` can be either a key or a MIME Type.""" 618 `mimetype` is the type of the content. 619 `selector` can be either a key or the expected output MIME Type. 620 """ 550 621 from trac.web import RequestDone 551 content, output_type, ext = self.convert_content( req, in_type,552 content, selector)622 content, output_type, ext = self.convert_content( 623 req, content, mimetype, selector, filename) 553 624 req.send_response(200) 554 625 req.send_header('Content-Type', output_type) 555 req.send_header('Content-Disposition', 'filename=%s.%s' % (filename,556 ext))626 req.send_header('Content-Disposition', 'filename=%s.%s' % 627 (filename, ext)) 557 628 req.end_headers() 558 629 req.write(content) 559 630 raise RequestDone 560 631 561 632 633 # utility for Mimeview._annotate 562 634 def _html_splitlines(lines): <
