| 1 | ########################################################################## |
|---|
| 2 | # |
|---|
| 3 | # downloadStats.py: macro for Trac to show webalizer stats |
|---|
| 4 | # |
|---|
| 5 | # Download Stats receive a pattern and show the sum of all matched in |
|---|
| 6 | # webalizer html files |
|---|
| 7 | # See http:// |
|---|
| 8 | # |
|---|
| 9 | # ==================================================================== |
|---|
| 10 | # Copyright (c) 2005 Debian-BR-CDD Team. All rights reserved. |
|---|
| 11 | # |
|---|
| 12 | # This package is free software; you can redistribute it and/or modify |
|---|
| 13 | # it under the terms of the GNU General Public License as published by |
|---|
| 14 | # the Free Software Foundation; version 2 dated June, 1991. |
|---|
| 15 | # |
|---|
| 16 | # This package is distributed in the hope that it will be useful, |
|---|
| 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 19 | # GNU General Public License for more details. |
|---|
| 20 | # |
|---|
| 21 | # You should have received a copy of the GNU General Public License |
|---|
| 22 | # along with this package; if not, write to the Free Software |
|---|
| 23 | # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA |
|---|
| 24 | # 02111-1307, USA. |
|---|
| 25 | # |
|---|
| 26 | ######################################################################### |
|---|
| 27 | # Authors: Tiago Bortoletto Vaz <tiago@debian-ba.org> |
|---|
| 28 | # Otavio Salvador <otavio@debian.org> |
|---|
| 29 | |
|---|
| 30 | import re |
|---|
| 31 | import os |
|---|
| 32 | |
|---|
| 33 | def uniq(alist): # Fastest without order preserving |
|---|
| 34 | set = {} |
|---|
| 35 | map(set.__setitem__, alist, []) |
|---|
| 36 | return set.keys() |
|---|
| 37 | |
|---|
| 38 | def getHitsFromFile(file,pattern,webalizer_path): |
|---|
| 39 | """ |
|---|
| 40 | in: a html webalizer file, a pattern and the webalizer path |
|---|
| 41 | out: a list of hits, one item for each match |
|---|
| 42 | """ |
|---|
| 43 | pattern = pattern or 'http' |
|---|
| 44 | file=open(webalizer_path+file,'r') |
|---|
| 45 | current=file.read() |
|---|
| 46 | avaliable=re.findall(r"""> |
|---|
| 47 | <TD ALIGN=right><FONT SIZE="-1"><B>[0-9]+</B></FONT></TD> |
|---|
| 48 | <TD ALIGN=right><FONT SIZE="-2">[0-9]+,[0-9]+%</FONT></TD> |
|---|
| 49 | <TD ALIGN=right><FONT SIZE="-1"><B>[0-9]+</B></FONT></TD> |
|---|
| 50 | <TD ALIGN=right><FONT SIZE="-2">[0-9]+,[0-9]+%</FONT></TD> |
|---|
| 51 | <TD ALIGN=left NOWRAP><FONT SIZE="-1"><A HREF="http://.*</A></FONT></TD></TR> |
|---|
| 52 | <TR> |
|---|
| 53 | """,current) |
|---|
| 54 | file.close() |
|---|
| 55 | hits=[] |
|---|
| 56 | for file in uniq(avaliable): |
|---|
| 57 | if pattern in file: |
|---|
| 58 | hits_tmp=re.findall(r"<B>[0-9]+</B>",file)[0] |
|---|
| 59 | hits.append(int(re.sub(r'[^0-9]','',hits_tmp))) |
|---|
| 60 | return hits |
|---|
| 61 | |
|---|
| 62 | def getHitsFromAll(args): |
|---|
| 63 | """ |
|---|
| 64 | in: a string to be split using '|' as delimiter where the first argument is the webalizer path, |
|---|
| 65 | other are pattern that should be matched. Ex. /var/www/webalizer|file.iso|file2.raw |
|---|
| 66 | out: the sum of all matched patterns |
|---|
| 67 | """ |
|---|
| 68 | arg=args.split('|') |
|---|
| 69 | webalizer_path=arg.pop(0) |
|---|
| 70 | patterns=arg |
|---|
| 71 | all_hits=0 |
|---|
| 72 | file_pattern=r".html" #FIXME: build a true pattern for usage_xxx.html, not every .html |
|---|
| 73 | all_files=os.listdir(webalizer_path) |
|---|
| 74 | for file in all_files: |
|---|
| 75 | if file_pattern in file: |
|---|
| 76 | for pattern in patterns: |
|---|
| 77 | for i in getHitsFromFile(file,pattern,webalizer_path): |
|---|
| 78 | all_hits+=i |
|---|
| 79 | return all_hits |
|---|
| 80 | |
|---|
| 81 | def execute(hdf,args,env): |
|---|
| 82 | return str(getHitsFromAll(args)) |
|---|