|
1 #!/usr/bin/env python |
|
2 # -*- coding: utf-8 -*- |
|
3 |
|
4 import os, sys |
|
5 sys.path.append(os.path.dirname(__file__)) |
|
6 import dbutil |
|
7 |
|
8 conn = dbutil.connect() |
|
9 c = conn.cursor() |
|
10 |
|
11 c.execute('''select submitter_name from comments_comment''') |
|
12 |
|
13 reviewers = {} |
|
14 |
|
15 mappings = { |
|
16 u'alejandro "tab-lover" dubrovsky': u'Alejandro Dubrovsky', |
|
17 u'alex hirzel <ahirzel@mtu.edu>': u'Alex Hirzel', |
|
18 u'anonymous coward': u'Anonymous', |
|
19 u'arthur van leeuwen': u'Arthur van Leeuwen', |
|
20 u'augustss': u'Lennart Augustsson', |
|
21 u'ed t': u'Anonymous', |
|
22 u'geogre moschovitis': u'George Moschovitis', |
|
23 u'george m': u'George Moschovitis', |
|
24 u'haskell newb': u'Anonymous', |
|
25 u'j. pablo fernandez': u'J. Pablo Fernández', |
|
26 u'kamal al-marhoobi': u'Kamal Al-Marhubi', |
|
27 u'kevin w.': u'Kevin Watters', |
|
28 u'max cantor (#haskell - mxc)': u'Max Cantor', |
|
29 u'michael campbell': u'Michael Campbell', |
|
30 u'mike btauwerman': u'Mike Brauwerman', |
|
31 u'no credit necessary': u'Anonymous', |
|
32 u'nykänen, matti': u'Matti Nykänen', |
|
33 u'omar antolin camarena': u'Omar Antolín Camarena', |
|
34 u'ryan t mulligan': u'Ryan T. Mulligan', |
|
35 u'sengan baring-gould': u'Sengan Baring-Gould', |
|
36 u'some guy': u'Anonymous', |
|
37 u'tomas janousek': u'Tomáš Janoušek', |
|
38 u'william halchin': u'William N. Halchin', |
|
39 } |
|
40 |
|
41 def fixup(s): |
|
42 try: |
|
43 return s.encode('ascii') |
|
44 except UnicodeEncodeError: |
|
45 def f(c): |
|
46 o = ord(c) |
|
47 if o < 128: |
|
48 return c |
|
49 return '&#%d;' % o |
|
50 return ''.join(map(f, s)) |
|
51 |
|
52 total = 0 |
|
53 for r in c.fetchall(): |
|
54 r = r[0].decode('utf-8') |
|
55 if r in ("Bryan O'Sullivan",): |
|
56 continue |
|
57 total += 1 |
|
58 m = mappings.get(r.lower()) |
|
59 if m: |
|
60 r = m |
|
61 elif len(r) < 2 or ' ' not in r: |
|
62 r = 'Anonymous' |
|
63 reviewers.setdefault(r, 0) |
|
64 reviewers[r] += 1 |
|
65 |
|
66 reviewers = sorted(reviewers.iteritems(), key=lambda x: x[0]) |
|
67 |
|
68 cohorts = [(.01,1),(.002,.01)] |
|
69 |
|
70 for (lo,hi) in cohorts: |
|
71 lo = total * lo |
|
72 hi = total * hi |
|
73 for r in [n for n in reviewers if lo <= n[1] < hi]: |
|
74 if r[1] > 3: |
|
75 print '%s,' % fixup(r[0]) |
|
76 print |
|
77 |
|
78 lo = total * .002 |
|
79 for n in reviewers: |
|
80 if n[1] < lo: |
|
81 print '%s,' % fixup(n[0]) |