Edgewall Software

source: plugins/1.0/spam-filter/tracspamfilter/filters/fspamlist.py

Last change on this file was 14841, checked in by Ryan J Ollos, 7 years ago

1.0.9dev: Trap ParseError calling ElementTree.fromstring

This should provide more information about potential issues.
The following was seen in the logs:

2016-06-07 02:36:04,008 Trac[filtersystem] ERROR: Filter strategy <tracspamfilter.filters.fspamlist.FSpamListFilterStrategy object at 0x7f17765c7450> raised exception: syntax error: line 1, column 0
Traceback (most recent call last):
  File "/var/www/bugs.jqueryui.com/private/pve/local/lib/python2.7/site-packages/tracspamfilter/filtersystem.py", line 236, in test
    retval = strategy.test(req, author, content, ip)
  File "/var/www/bugs.jqueryui.com/private/pve/local/lib/python2.7/site-packages/tracspamfilter/filters/fspamlist.py", line 54, in test
    tree = ElementTree.fromstring(resp)
  File "/usr/lib/python2.7/xml/etree/ElementTree.py", line 1300, in XML
    parser.feed(text)
  File "/usr/lib/python2.7/xml/etree/ElementTree.py", line 1642, in feed
    self._raiseerror(v)
  File "/usr/lib/python2.7/xml/etree/ElementTree.py", line 1506, in _raiseerror
    raise err
ParseError: syntax error: line 1, column 0
File size: 3.8 KB
Line 
1# -*- coding: utf-8 -*-
2#
3# Copyright (C) 2012 Dirk Stöcker <trac@dstoecker.de>
4# All rights reserved.
5#
6# This software is licensed as described in the file COPYING, which
7# you should have received as part of this distribution. The terms
8# are also available at http://trac.edgewall.com/license.html.
9#
10# This software consists of voluntary contributions made by many
11# individuals. For the exact contribution history, see the revision
12# history and logs, available at http://projects.edgewall.com/trac/.
13
14import string
15import urllib2
16from email.Utils import parseaddr
17from pkg_resources import get_distribution
18from urllib import quote
19from xml.etree import ElementTree
20
21from trac import __version__ as TRAC_VERSION
22from trac.config import IntOption, Option
23from trac.core import Component, implements
24
25from tracspamfilter.api import IFilterStrategy, N_
26
27
28class FSpamListFilterStrategy(Component):
29 """Spam filter using the FSpamList (http://www.fspamlist.com/).
30 """
31 implements(IFilterStrategy)
32
33 karma_points = IntOption('spam-filter', 'fspamlist_karma', '3',
34 """By how many points a FSpamList reject impacts the overall karma of
35 a submission.""", doc_domain='tracspamfilter')
36
37 api_key = Option('spam-filter', 'fspamlist_api_key', '',
38 """API key required to use FSpamList.""", doc_domain='tracspamfilter')
39
40 user_agent = 'Trac/%s | SpamFilter/%s' % (
41 TRAC_VERSION, get_distribution('TracSpamFilter').version
42 )
43
44 # IFilterStrategy implementation
45
46 def is_external(self):
47 return True
48
49 def test(self, req, author, content, ip):
50 if not self._check_preconditions(False):
51 return
52 try:
53 resp = self._send(req, author, ip)
54 except urllib2.URLError, e:
55 self.log.warn("FSpamList request failed (%s)", e)
56 return
57 try:
58 tree = ElementTree.fromstring(resp)
59 except ElementTree.ParseError, e:
60 self.log.warn("Error parsing response from FSpamList: %s\n"
61 "Response:\n%s", e, resp)
62 return
63
64 reason = []
65 for el in list(tree):
66 if el.findtext('isspammer', 'false') == 'true':
67 r = '%s [%s' % (el.findtext('spammer', '-'),
68 el.findtext('threat', '-'))
69 n = string.split(el.findtext('notes', '-'),
70 'Time taken')[0].rstrip(' ')
71 if n != "":
72 r += ", " + n
73 r += "]"
74 reason.append(r)
75 if len(reason):
76 return (-abs(self.karma_points) * len(reason),
77 N_("FSpamList says this is spam (%s)"),
78 ("; ".join(reason)))
79
80 def train(self, req, author, content, ip, spam=True):
81 return 0
82
83 # Internal methods
84
85 def _check_preconditions(self, train):
86 if self.karma_points == 0:
87 return False
88
89 if not self.api_key:
90 return False
91
92 return True
93
94 def _send(self, req, author, ip):
95 # Split up author into name and email, if possible
96 author = author.encode('utf-8')
97 author_name, author_email = parseaddr(author)
98 if not author_name and not author_email:
99 author_name = author
100 elif not author_name and author_email.find('@') < 1:
101 author_name = author
102 author_email = None
103 if author_name == 'anonymous':
104 author_name = None
105
106 request = quote(ip)
107 if author_name:
108 request += "," + quote(author_name)
109 if author_email:
110 request += "," + quote(author_email)
111
112 url = 'http://www.fspamlist.com/api.php?spammer=' + request + \
113 '&key=' + self.api_key
114 urlreq = urllib2.Request(url, None, {'User-Agent': self.user_agent})
115
116 resp = urllib2.urlopen(urlreq)
117 return resp.read()
Note: See TracBrowser for help on using the repository browser.