#!/usr/bin/env python # copyright 2007 Francesco Crippa # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. import pycurl import StringIO import urllib import re import sys import time def find_world_region(data): list = data.split("\n") counts = {} country = "Not Defined" counts[country] = 0 for x in list: if re.search("^=== ", x): country = x.replace("===", "").strip().title() counts[country] = 0 elif re.search("^ \* ", x): counts[country] = counts[country] + 1 return counts def print_stats(counts): out = open('ambassadors_country_list.csv', 'w') sys.stdout = out print "FedoraAmbassadrs,", for date in counts: # date = 2008-01 for country in counts[date]: # country = Italy # counts[date][country] = 5 print "%s," % country , break print "" for date in counts: print "%s," % date , for country in counts[date]: print "%d," % counts[date][country], print "" out.close() def find_page_date(page): find_date = re.compile("([0-9]+)[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])") date = find_date.findall(page) return date[0] print "Welcome to Stats generator for Fedora Ambassadors" if __name__ == "__main__": page = pycurl.Curl() vars = {"action": "raw"} url = "http://fedoraproject.org/wiki/Ambassadors/CountryList" print " * Start configuration" page.setopt(pycurl.URL, url) page.setopt(pycurl.FOLLOWLOCATION, 1) page.setopt(pycurl.USERAGENT, "Mozilla/4.0") page.setopt(pycurl.MAXREDIRS, 5) page.setopt(pycurl.VERBOSE, 0) print " * Downloading pages" counts = {} k = 0 for i in range(1,663): #k = k + 1 #if k == 50: # print " - I'm waiting some minutes..." # time.sleep(100) # k = 0 content = StringIO.StringIO() page.setopt(pycurl.WRITEFUNCTION, content.write) vars = {} vars["rev"] = i print " - Revision %d" % vars["rev"], page.setopt(pycurl.POSTFIELDS, urllib.urlencode(vars)) page.perform() year, month, day = find_page_date(content.getvalue()) print "(Modified on %s-%s-%s)" % (year, month, day) vars["action"] = "raw" page.setopt(pycurl.POSTFIELDS, urllib.urlencode(vars)) page.perform() #print content.getvalue() counts[year+"-"+month] = find_world_region(content.getvalue()) print_stats(counts) page.close