Edgewall Software

source: plugins/1.0/spam-filter/tracspamfilter/filters/regex.py

Last change on this file was 16632, checked in by Ryan J Ollos, 5 years ago

1.0.12dev: Make compatible with Trac < 1.0.2

  • Property svn:eol-style set to native
File size: 5.2 KB
Line 
1# -*- coding: utf-8 -*-
2#
3# Copyright (C) 2005-2006 Edgewall Software
4# Copyright (C) 2005 Matthew Good <trac@matt-good.net>
5# Copyright (C) 2006 Christopher Lenz <cmlenz@gmx.de>
6# All rights reserved.
7#
8# This software is licensed as described in the file COPYING, which
9# you should have received as part of this distribution. The terms
10# are also available at http://trac.edgewall.com/license.html.
11#
12# This software consists of voluntary contributions made by many
13# individuals. For the exact contribution history, see the revision
14# history and logs, available at http://projects.edgewall.com/trac/.
15#
16# Author: Matthew Good <trac@matt-good.net>
17
18import re
19
20from trac.config import BoolOption, IntOption, Option
21from trac.core import Component, TracError, implements
22from trac.util.html import html as tag
23from trac.wiki.api import IWikiChangeListener, IWikiPageManipulator
24from trac.wiki.model import WikiPage
25
26from tracspamfilter.api import IFilterStrategy, N_, tag_
27
28
29class RegexFilterStrategy(Component):
30 """Spam filter based on regular expressions defined in BadContent page.
31 """
32 implements(IFilterStrategy, IWikiChangeListener, IWikiPageManipulator)
33
34 karma_points = IntOption('spam-filter', 'regex_karma', '5',
35 """By how many points a match with a pattern on the BadContent page
36 impacts the overall karma of a submission.""",
37 doc_domain='tracspamfilter')
38
39 badcontent_file = Option('spam-filter', 'badcontent_file', '',
40 """Local file to be loaded to get BadContent. Can be used in
41 addition to BadContent wiki page.""",
42 doc_domain='tracspamfilter')
43
44 show_blacklisted = BoolOption('spam-filter', 'show_blacklisted', 'true',
45 "Show the matched bad content patterns in rejection message.",
46 doc_domain='tracspamfilter')
47
48 def __init__(self):
49 self.patterns = []
50 page = WikiPage(self.env, 'BadContent')
51 if page.exists:
52 try:
53 self._load_patterns(page)
54 except TracError:
55 pass
56 if self.badcontent_file != '':
57 with open(self.badcontent_file, 'r') as file:
58 if file is None:
59 self.log.warning("BadContent file cannot be opened")
60 else:
61 lines = file.read().splitlines()
62 pat = [re.compile(p.strip()) for p in lines if p.strip()]
63 self.log.debug("Loaded %s patterns from BadContent file",
64 len(pat))
65 self.patterns += pat
66
67 # IFilterStrategy implementation
68
69 def is_external(self):
70 return False
71
72 def test(self, req, author, content, ip):
73 gotcha = []
74 points = 0
75 if author is not None and author != 'anonymous':
76 testcontent = author + '\n' + content
77 else:
78 testcontent = content
79 for pattern in self.patterns:
80 match = pattern.search(testcontent)
81 if match:
82 gotcha.append("'%s'" % pattern.pattern)
83 self.log.debug('Pattern %s found in submission',
84 pattern.pattern)
85 points -= abs(self.karma_points)
86 if points != 0:
87 if self.show_blacklisted:
88 matches = ", ".join(gotcha)
89 return points, N_("Content contained these blacklisted "
90 "patterns: %s"), matches
91 else:
92 return points, N_("Content contained %s blacklisted "
93 "patterns"), str(len(gotcha))
94
95 def train(self, req, author, content, ip, spam=True):
96 return 0
97
98 # IWikiPageManipulator implementation
99
100 def prepare_wiki_page(self, req, page, fields):
101 pass
102
103 def validate_wiki_page(self, req, page):
104 if page.name == 'BadContent':
105 try:
106 self._load_patterns(page)
107 except TracError, e:
108 return [(None, e)]
109 return []
110
111 # IWikiChangeListener implementation
112
113 def wiki_page_changed(self, page, *args):
114 if page.name == 'BadContent':
115 self._load_patterns(page)
116
117 wiki_page_added = wiki_page_changed
118
119 wiki_page_version_deleted = wiki_page_changed
120
121 def wiki_page_deleted(self, page):
122 if page.name == 'BadContent':
123 self.patterns = []
124
125 # Internal methods
126
127 def _load_patterns(self, page):
128 if '{{{' in page.text and '}}}' in page.text:
129 lines = page.text.split('{{{', 1)[1].split('}}}', 1)[0]
130 lines = [l.strip() for l in lines.splitlines() if l.strip()]
131 for p in lines:
132 try:
133 self.patterns.append(re.compile(p))
134 except re.error, e:
135 self.log.debug("Error in pattern %s: %s", p, e)
136 raise TracError(tag_("Error in pattern %(pattern)s: "
137 "%(error)s.", pattern=tag.tt(p),
138 error=tag.i(e)))
139 self.log.debug("Loaded %s patterns from BadContent",
140 len(self.patterns))
141 else:
142 self.log.warning("BadContent page does not contain any patterns")
143 self.patterns = []
Note: See TracBrowser for help on using the repository browser.