| 1 | # -*- coding: utf-8 -*-
|
|---|
| 2 | #
|
|---|
| 3 | # Copyright (C) 2011 Dirk Stöcker <trac@dstoecker.de>
|
|---|
| 4 | # All rights reserved.
|
|---|
| 5 | #
|
|---|
| 6 | # This software is licensed as described in the file COPYING, which
|
|---|
| 7 | # you should have received as part of this distribution. The terms
|
|---|
| 8 | # are also available at http://trac.edgewall.com/license.html.
|
|---|
| 9 | #
|
|---|
| 10 | # This software consists of voluntary contributions made by many
|
|---|
| 11 | # individuals. For the exact contribution history, see the revision
|
|---|
| 12 | # history and logs, available at http://projects.edgewall.com/trac/.
|
|---|
| 13 |
|
|---|
| 14 | import urllib2
|
|---|
| 15 | from email.Utils import parseaddr
|
|---|
| 16 | from pkg_resources import get_distribution
|
|---|
| 17 | from urllib import urlencode
|
|---|
| 18 | from xml.etree import ElementTree
|
|---|
| 19 |
|
|---|
| 20 | from trac import __version__ as TRAC_VERSION
|
|---|
| 21 | from trac.config import IntOption, Option
|
|---|
| 22 | from trac.core import Component, implements
|
|---|
| 23 |
|
|---|
| 24 | from tracspamfilter.api import IFilterStrategy, N_
|
|---|
| 25 |
|
|---|
| 26 |
|
|---|
| 27 | class StopForumSpamFilterStrategy(Component):
|
|---|
| 28 | """Spam filter using the StopForumSpam service (http://stopforumspam.com/).
|
|---|
| 29 | """
|
|---|
| 30 | implements(IFilterStrategy)
|
|---|
| 31 |
|
|---|
| 32 | karma_points = IntOption('spam-filter', 'stopforumspam_karma', '4',
|
|---|
| 33 | """By how many points a StopForumSpam reject impacts the overall
|
|---|
| 34 | karma of a submission.""", doc_domain='tracspamfilter')
|
|---|
| 35 |
|
|---|
| 36 | api_key = Option('spam-filter', 'stopforumspam_api_key', '',
|
|---|
| 37 | "API key used to report SPAM.", doc_domain='tracspamfilter')
|
|---|
| 38 |
|
|---|
| 39 | user_agent = 'Trac/%s | SpamFilter/%s' % (
|
|---|
| 40 | TRAC_VERSION, get_distribution('TracSpamFilter').version
|
|---|
| 41 | )
|
|---|
| 42 |
|
|---|
| 43 | # IFilterStrategy implementation
|
|---|
| 44 |
|
|---|
| 45 | def is_external(self):
|
|---|
| 46 | return True
|
|---|
| 47 |
|
|---|
| 48 | def test(self, req, author, content, ip):
|
|---|
| 49 | if not self._check_preconditions(False):
|
|---|
| 50 | return
|
|---|
| 51 | try:
|
|---|
| 52 | resp = self._send(req, author, content, ip, False)
|
|---|
| 53 | tree = ElementTree.fromstring(resp)
|
|---|
| 54 | karma = 0
|
|---|
| 55 | reason = []
|
|---|
| 56 | for entry in ('username', 'ip', 'email'):
|
|---|
| 57 | e = tree.find('./%s/appears' % entry)
|
|---|
| 58 | if e is not None and e.text == "1":
|
|---|
| 59 | confidence = tree.find('./%s/confidence' % entry).text
|
|---|
| 60 | karma += abs(self.karma_points) * float(confidence) / 100.0
|
|---|
| 61 | reason.append("%s [%s]" % (entry, confidence))
|
|---|
| 62 | reason = ",".join(reason)
|
|---|
| 63 | if karma:
|
|---|
| 64 | return (-int(karma + 0.5),
|
|---|
| 65 | N_("StopForumSpam says this is spam (%s)"), reason)
|
|---|
| 66 | except IOError, e:
|
|---|
| 67 | self.log.warn("StopForumSpam request failed (%s)", e)
|
|---|
| 68 |
|
|---|
| 69 | def train(self, req, author, content, ip, spam=True):
|
|---|
| 70 | if not spam:
|
|---|
| 71 | return 0
|
|---|
| 72 | elif not self._check_preconditions(True):
|
|---|
| 73 | return -2
|
|---|
| 74 |
|
|---|
| 75 | try:
|
|---|
| 76 | self._send(req, author, content, ip, True)
|
|---|
| 77 | return 1
|
|---|
| 78 | except urllib2.URLError, e:
|
|---|
| 79 | self.log.warn("StopForumSpam request failed (%s)", e)
|
|---|
| 80 | return -1
|
|---|
| 81 |
|
|---|
| 82 | # Internal methods
|
|---|
| 83 |
|
|---|
| 84 | def _check_preconditions(self, train):
|
|---|
| 85 | if self.karma_points == 0:
|
|---|
| 86 | return False
|
|---|
| 87 |
|
|---|
| 88 | if train and not self.api_key:
|
|---|
| 89 | return False
|
|---|
| 90 |
|
|---|
| 91 | return True
|
|---|
| 92 |
|
|---|
| 93 | def _send(self, req, author, content, ip, train):
|
|---|
| 94 | # Split up author into name and email, if possible
|
|---|
| 95 | author = author.encode('utf-8')
|
|---|
| 96 | author_name, author_email = parseaddr(author)
|
|---|
| 97 | if not author_name and not author_email:
|
|---|
| 98 | author_name = author
|
|---|
| 99 | elif not author_name and author_email.find('@') < 1:
|
|---|
| 100 | author_name = author
|
|---|
| 101 | author_email = None
|
|---|
| 102 | if author_name == 'anonymous':
|
|---|
| 103 | author_name = None
|
|---|
| 104 |
|
|---|
| 105 | params = {'ip': ip}
|
|---|
| 106 | if author_name:
|
|---|
| 107 | params['username'] = author_name
|
|---|
| 108 | if author_email:
|
|---|
| 109 | params['email'] = author_email
|
|---|
| 110 |
|
|---|
| 111 | if train:
|
|---|
| 112 | if not author_name or not author_email:
|
|---|
| 113 | return
|
|---|
| 114 | params['api_key'] = self.api_key
|
|---|
| 115 | params['ip_addr'] = ip
|
|---|
| 116 | params['evidence'] = "Spam training using Trac SpamFilter " \
|
|---|
| 117 | "(%s)\n%s" \
|
|---|
| 118 | % (self.user_agent, content.encode('utf-8'))
|
|---|
| 119 | url = 'http://www.stopforumspam.com/add.php'
|
|---|
| 120 | urlreq = urllib2.Request(url, urlencode(params),
|
|---|
| 121 | {'User-Agent': self.user_agent})
|
|---|
| 122 | else:
|
|---|
| 123 | params['ip'] = ip
|
|---|
| 124 | url = 'http://www.stopforumspam.com/api?confidence&f=xmldom&' + \
|
|---|
| 125 | urlencode(params)
|
|---|
| 126 | urlreq = urllib2.Request(url, None,
|
|---|
| 127 | {'User-Agent': self.user_agent})
|
|---|
| 128 |
|
|---|
| 129 | resp = urllib2.urlopen(urlreq)
|
|---|
| 130 | return resp.read()
|
|---|