| 1 | # -*- coding: utf-8 -*-
|
|---|
| 2 | #
|
|---|
| 3 | # Copyright (C) 2005-2006 Edgewall Software
|
|---|
| 4 | # Copyright (C) 2005 Matthew Good <trac@matt-good.net>
|
|---|
| 5 | # Copyright (C) 2006 Christopher Lenz <cmlenz@gmx.de>
|
|---|
| 6 | # All rights reserved.
|
|---|
| 7 | #
|
|---|
| 8 | # This software is licensed as described in the file COPYING, which
|
|---|
| 9 | # you should have received as part of this distribution. The terms
|
|---|
| 10 | # are also available at http://trac.edgewall.com/license.html.
|
|---|
| 11 | #
|
|---|
| 12 | # This software consists of voluntary contributions made by many
|
|---|
| 13 | # individuals. For the exact contribution history, see the revision
|
|---|
| 14 | # history and logs, available at http://projects.edgewall.com/trac/.
|
|---|
| 15 | #
|
|---|
| 16 | # Author: Matthew Good <trac@matt-good.net>
|
|---|
| 17 |
|
|---|
| 18 | import re
|
|---|
| 19 |
|
|---|
| 20 | from trac.config import BoolOption, IntOption, Option
|
|---|
| 21 | from trac.core import Component, TracError, implements
|
|---|
| 22 | from trac.util.html import html as tag
|
|---|
| 23 | from trac.wiki.api import IWikiChangeListener, IWikiPageManipulator
|
|---|
| 24 | from trac.wiki.model import WikiPage
|
|---|
| 25 |
|
|---|
| 26 | from tracspamfilter.api import IFilterStrategy, N_, tag_
|
|---|
| 27 |
|
|---|
| 28 |
|
|---|
| 29 | class RegexFilterStrategy(Component):
|
|---|
| 30 | """Spam filter based on regular expressions defined in BadContent page.
|
|---|
| 31 | """
|
|---|
| 32 | implements(IFilterStrategy, IWikiChangeListener, IWikiPageManipulator)
|
|---|
| 33 |
|
|---|
| 34 | karma_points = IntOption('spam-filter', 'regex_karma', '5',
|
|---|
| 35 | """By how many points a match with a pattern on the BadContent page
|
|---|
| 36 | impacts the overall karma of a submission.""",
|
|---|
| 37 | doc_domain='tracspamfilter')
|
|---|
| 38 |
|
|---|
| 39 | badcontent_file = Option('spam-filter', 'badcontent_file', '',
|
|---|
| 40 | """Local file to be loaded to get BadContent. Can be used in
|
|---|
| 41 | addition to BadContent wiki page.""",
|
|---|
| 42 | doc_domain='tracspamfilter')
|
|---|
| 43 |
|
|---|
| 44 | show_blacklisted = BoolOption('spam-filter', 'show_blacklisted', 'true',
|
|---|
| 45 | "Show the matched bad content patterns in rejection message.",
|
|---|
| 46 | doc_domain='tracspamfilter')
|
|---|
| 47 |
|
|---|
| 48 | def __init__(self):
|
|---|
| 49 | self.patterns = []
|
|---|
| 50 | page = WikiPage(self.env, 'BadContent')
|
|---|
| 51 | if page.exists:
|
|---|
| 52 | try:
|
|---|
| 53 | self._load_patterns(page)
|
|---|
| 54 | except TracError:
|
|---|
| 55 | pass
|
|---|
| 56 | if self.badcontent_file != '':
|
|---|
| 57 | with open(self.badcontent_file, 'r') as file:
|
|---|
| 58 | if file is None:
|
|---|
| 59 | self.log.warning("BadContent file cannot be opened")
|
|---|
| 60 | else:
|
|---|
| 61 | lines = file.read().splitlines()
|
|---|
| 62 | pat = [re.compile(p.strip()) for p in lines if p.strip()]
|
|---|
| 63 | self.log.debug("Loaded %s patterns from BadContent file",
|
|---|
| 64 | len(pat))
|
|---|
| 65 | self.patterns += pat
|
|---|
| 66 |
|
|---|
| 67 | # IFilterStrategy implementation
|
|---|
| 68 |
|
|---|
| 69 | def is_external(self):
|
|---|
| 70 | return False
|
|---|
| 71 |
|
|---|
| 72 | def test(self, req, author, content, ip):
|
|---|
| 73 | gotcha = []
|
|---|
| 74 | points = 0
|
|---|
| 75 | if author is not None and author != 'anonymous':
|
|---|
| 76 | testcontent = author + '\n' + content
|
|---|
| 77 | else:
|
|---|
| 78 | testcontent = content
|
|---|
| 79 | for pattern in self.patterns:
|
|---|
| 80 | match = pattern.search(testcontent)
|
|---|
| 81 | if match:
|
|---|
| 82 | gotcha.append("'%s'" % pattern.pattern)
|
|---|
| 83 | self.log.debug('Pattern %s found in submission',
|
|---|
| 84 | pattern.pattern)
|
|---|
| 85 | points -= abs(self.karma_points)
|
|---|
| 86 | if points != 0:
|
|---|
| 87 | if self.show_blacklisted:
|
|---|
| 88 | matches = ", ".join(gotcha)
|
|---|
| 89 | return points, N_("Content contained these blacklisted "
|
|---|
| 90 | "patterns: %s"), matches
|
|---|
| 91 | else:
|
|---|
| 92 | return points, N_("Content contained %s blacklisted "
|
|---|
| 93 | "patterns"), str(len(gotcha))
|
|---|
| 94 |
|
|---|
| 95 | def train(self, req, author, content, ip, spam=True):
|
|---|
| 96 | return 0
|
|---|
| 97 |
|
|---|
| 98 | # IWikiPageManipulator implementation
|
|---|
| 99 |
|
|---|
| 100 | def prepare_wiki_page(self, req, page, fields):
|
|---|
| 101 | pass
|
|---|
| 102 |
|
|---|
| 103 | def validate_wiki_page(self, req, page):
|
|---|
| 104 | if page.name == 'BadContent':
|
|---|
| 105 | try:
|
|---|
| 106 | self._load_patterns(page)
|
|---|
| 107 | except TracError, e:
|
|---|
| 108 | return [(None, e)]
|
|---|
| 109 | return []
|
|---|
| 110 |
|
|---|
| 111 | # IWikiChangeListener implementation
|
|---|
| 112 |
|
|---|
| 113 | def wiki_page_changed(self, page, *args):
|
|---|
| 114 | if page.name == 'BadContent':
|
|---|
| 115 | self._load_patterns(page)
|
|---|
| 116 |
|
|---|
| 117 | wiki_page_added = wiki_page_changed
|
|---|
| 118 |
|
|---|
| 119 | wiki_page_version_deleted = wiki_page_changed
|
|---|
| 120 |
|
|---|
| 121 | def wiki_page_deleted(self, page):
|
|---|
| 122 | if page.name == 'BadContent':
|
|---|
| 123 | self.patterns = []
|
|---|
| 124 |
|
|---|
| 125 | # Internal methods
|
|---|
| 126 |
|
|---|
| 127 | def _load_patterns(self, page):
|
|---|
| 128 | if '{{{' in page.text and '}}}' in page.text:
|
|---|
| 129 | lines = page.text.split('{{{', 1)[1].split('}}}', 1)[0]
|
|---|
| 130 | lines = [l.strip() for l in lines.splitlines() if l.strip()]
|
|---|
| 131 | for p in lines:
|
|---|
| 132 | try:
|
|---|
| 133 | self.patterns.append(re.compile(p))
|
|---|
| 134 | except re.error, e:
|
|---|
| 135 | self.log.debug("Error in pattern %s: %s", p, e)
|
|---|
| 136 | raise TracError(tag_("Error in pattern %(pattern)s: "
|
|---|
| 137 | "%(error)s.", pattern=tag.tt(p),
|
|---|
| 138 | error=tag.i(e)))
|
|---|
| 139 | self.log.debug("Loaded %s patterns from BadContent",
|
|---|
| 140 | len(self.patterns))
|
|---|
| 141 | else:
|
|---|
| 142 | self.log.warning("BadContent page does not contain any patterns")
|
|---|
| 143 | self.patterns = []
|
|---|