| 1 | # -*- coding: utf-8 -*-
|
|---|
| 2 | #
|
|---|
| 3 | # Copyright (C) 2012 Dirk Stöcker <trac@dstoecker.de>
|
|---|
| 4 | # All rights reserved.
|
|---|
| 5 | #
|
|---|
| 6 | # This software is licensed as described in the file COPYING, which
|
|---|
| 7 | # you should have received as part of this distribution. The terms
|
|---|
| 8 | # are also available at http://trac.edgewall.com/license.html.
|
|---|
| 9 | #
|
|---|
| 10 | # This software consists of voluntary contributions made by many
|
|---|
| 11 | # individuals. For the exact contribution history, see the revision
|
|---|
| 12 | # history and logs, available at http://projects.edgewall.com/trac/.
|
|---|
| 13 |
|
|---|
| 14 | import string
|
|---|
| 15 | import urllib2
|
|---|
| 16 | from email.Utils import parseaddr
|
|---|
| 17 | from pkg_resources import get_distribution
|
|---|
| 18 | from urllib import quote
|
|---|
| 19 | from xml.etree import ElementTree
|
|---|
| 20 |
|
|---|
| 21 | from trac import __version__ as TRAC_VERSION
|
|---|
| 22 | from trac.config import IntOption, Option
|
|---|
| 23 | from trac.core import Component, implements
|
|---|
| 24 |
|
|---|
| 25 | from tracspamfilter.api import IFilterStrategy, N_
|
|---|
| 26 |
|
|---|
| 27 |
|
|---|
| 28 | class FSpamListFilterStrategy(Component):
|
|---|
| 29 | """Spam filter using the FSpamList (http://www.fspamlist.com/).
|
|---|
| 30 | """
|
|---|
| 31 | implements(IFilterStrategy)
|
|---|
| 32 |
|
|---|
| 33 | karma_points = IntOption('spam-filter', 'fspamlist_karma', '3',
|
|---|
| 34 | """By how many points a FSpamList reject impacts the overall karma of
|
|---|
| 35 | a submission.""", doc_domain='tracspamfilter')
|
|---|
| 36 |
|
|---|
| 37 | api_key = Option('spam-filter', 'fspamlist_api_key', '',
|
|---|
| 38 | """API key required to use FSpamList.""", doc_domain='tracspamfilter')
|
|---|
| 39 |
|
|---|
| 40 | user_agent = 'Trac/%s | SpamFilter/%s' % (
|
|---|
| 41 | TRAC_VERSION, get_distribution('TracSpamFilter').version
|
|---|
| 42 | )
|
|---|
| 43 |
|
|---|
| 44 | # IFilterStrategy implementation
|
|---|
| 45 |
|
|---|
| 46 | def is_external(self):
|
|---|
| 47 | return True
|
|---|
| 48 |
|
|---|
| 49 | def test(self, req, author, content, ip):
|
|---|
| 50 | if not self._check_preconditions(False):
|
|---|
| 51 | return
|
|---|
| 52 | try:
|
|---|
| 53 | resp = self._send(req, author, ip)
|
|---|
| 54 | except urllib2.URLError, e:
|
|---|
| 55 | self.log.warn("FSpamList request failed (%s)", e)
|
|---|
| 56 | return
|
|---|
| 57 | try:
|
|---|
| 58 | tree = ElementTree.fromstring(resp)
|
|---|
| 59 | except ElementTree.ParseError, e:
|
|---|
| 60 | self.log.warn("Error parsing response from FSpamList: %s\n"
|
|---|
| 61 | "Response:\n%s", e, resp)
|
|---|
| 62 | return
|
|---|
| 63 |
|
|---|
| 64 | reason = []
|
|---|
| 65 | for el in list(tree):
|
|---|
| 66 | if el.findtext('isspammer', 'false') == 'true':
|
|---|
| 67 | r = '%s [%s' % (el.findtext('spammer', '-'),
|
|---|
| 68 | el.findtext('threat', '-'))
|
|---|
| 69 | n = string.split(el.findtext('notes', '-'),
|
|---|
| 70 | 'Time taken')[0].rstrip(' ')
|
|---|
| 71 | if n != "":
|
|---|
| 72 | r += ", " + n
|
|---|
| 73 | r += "]"
|
|---|
| 74 | reason.append(r)
|
|---|
| 75 | if len(reason):
|
|---|
| 76 | return (-abs(self.karma_points) * len(reason),
|
|---|
| 77 | N_("FSpamList says this is spam (%s)"),
|
|---|
| 78 | ("; ".join(reason)))
|
|---|
| 79 |
|
|---|
| 80 | def train(self, req, author, content, ip, spam=True):
|
|---|
| 81 | return 0
|
|---|
| 82 |
|
|---|
| 83 | # Internal methods
|
|---|
| 84 |
|
|---|
| 85 | def _check_preconditions(self, train):
|
|---|
| 86 | if self.karma_points == 0:
|
|---|
| 87 | return False
|
|---|
| 88 |
|
|---|
| 89 | if not self.api_key:
|
|---|
| 90 | return False
|
|---|
| 91 |
|
|---|
| 92 | return True
|
|---|
| 93 |
|
|---|
| 94 | def _send(self, req, author, ip):
|
|---|
| 95 | # Split up author into name and email, if possible
|
|---|
| 96 | author = author.encode('utf-8')
|
|---|
| 97 | author_name, author_email = parseaddr(author)
|
|---|
| 98 | if not author_name and not author_email:
|
|---|
| 99 | author_name = author
|
|---|
| 100 | elif not author_name and author_email.find('@') < 1:
|
|---|
| 101 | author_name = author
|
|---|
| 102 | author_email = None
|
|---|
| 103 | if author_name == 'anonymous':
|
|---|
| 104 | author_name = None
|
|---|
| 105 |
|
|---|
| 106 | request = quote(ip)
|
|---|
| 107 | if author_name:
|
|---|
| 108 | request += "," + quote(author_name)
|
|---|
| 109 | if author_email:
|
|---|
| 110 | request += "," + quote(author_email)
|
|---|
| 111 |
|
|---|
| 112 | url = 'http://www.fspamlist.com/api.php?spammer=' + request + \
|
|---|
| 113 | '&key=' + self.api_key
|
|---|
| 114 | urlreq = urllib2.Request(url, None, {'User-Agent': self.user_agent})
|
|---|
| 115 |
|
|---|
| 116 | resp = urllib2.urlopen(urlreq)
|
|---|
| 117 | return resp.read()
|
|---|