--- /dev/null
+"""
+gerrit-stats: Generate codereview stats based from Gerrit commits
+Copyright (C) 2012 Diederik van Liere, Wikimedia Foundation
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+"""
+
+import os
+from datetime import datetime
+
+class Metric(object):
+ '''
+ The Metric class
+ '''
+ def __init__(self, name, raw_query, settings):
+ self.raw_query = raw_query
+ self.name = name
+ self.query = 'ssh -p %s %s gerrit query --format=%s %s' % (settings.port, settings.host, settings.format, self.raw_query)
+
+
+class Settings(object):
+ '''
+ This object contains properties that apply to all repositories, including the queries that will be
+ run to generate the statistics, a list of repositories to ignore and a set of engineers that do not use
+ a WMF email address and hence will be classified as volunteer.
+ '''
+ def __init__(self, settings):
+ self.queries = {'only+1':'-- CodeReview+1 -CodeReview+2 -CodeReview-1 -CodeReview-2',
+ 'no_review':'-- -CodeReview+1 -CodeReview-1 -CodeReview+2 -CodeReview-2',
+ }
+ self.whitelist=set(['niklas.laxstrom@gmail.com','roan.kattouw@gmail.com','maxsem.wiki@gmail.com','s.mazeland@xs4all.nl','jeroendedauw@gmail.com','mediawiki@danielfriesen.name','jdlrobson@gmail.com','hashar@free.fr'])
+ self.ignore_repos = ['test']
+ self.metrics = {}
+ self.parents = ['mediawiki/core',
+ 'mediawiki/extensions',
+ 'operations',
+ 'analytics',
+ ]
+
+ for name, query in self.queries.iteritems():
+ self.metrics[name] = Metric(name, query, settings)
+
+ def __str__(self):
+ return 'Metrics container object'
+
+
+class Gerrit(object):
+ '''
+ This object contains the setings to interact with the gerrit server, nothing fancy these are just
+ sensible defaults.
+ '''
+ def __init__(self):
+ self.data_location = 'data'
+ self.host = 'gerrit.wikimedia.org'
+ self.port = 29418
+ self.format = 'JSON'
+
+ def __str__(self):
+ return 'Codereview settings object.'
+
+
+class Repo(object):
+ def __init__(self, name, settings, gerrit):
+ self.touched = False
+ self.name = name
+ self.dataset = {}
+ self.create_path(self.name, gerrit)
+ self.filename = ('%s.csv' % (self.determine_filename(self.name)))
+ self.filemode = self.determine_filemode(self.filename, gerrit)
+
+ self.today = datetime.today()
+ self.email = {}
+ self.email['wikimedian'] = set()
+ self.email['volunteer'] = set()
+ self.num_metrics = 0
+ for metric in settings.metrics:
+ self.dataset[metric] = {}
+ self.dataset[metric]['oldest'] = datetime(2030,1,1)
+ self.dataset[metric]['wikimedian'] = 0
+ self.dataset[metric]['volunteer'] = 0
+ self.dataset[metric]['total'] = 0
+ self.num_metrics +=1
+
+ def __str__(self):
+ return self.name
+
+ def create_path(self, filename, gerrit):
+ print filename
+ dir= os.path.dirname(filename)
+ if dir != '':
+ dir = os.path.join(gerrit.data_location, dir)
+ try:
+ os.makedirs(dir)
+ print 'Creating %s...' % dir
+ except OSError:
+ pass
+
+ def determine_filename(self, filename):
+ return os.path.basename(filename)
+
+ def determine_filemode(self, filename, settings):
+ if os.path.isfile('%s/%s' % (settings.data_location, filename)) == False:
+ return 'w'
+ else:
+ return 'a'
+
--- /dev/null
+"""\r
+gerrit-stats: Generate codereview stats based from Gerrit commits\r
+Copyright (C) 2012 Diederik van Liere, Wikimedia Foundation\r
+\r
+This program is free software; you can redistribute it and/or\r
+modify it under the terms of the GNU General Public License\r
+as published by the Free Software Foundation; either version 2\r
+of the License, or (at your option) any later version.\r
+\r
+This program is distributed in the hope that it will be useful,\r
+but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
+GNU General Public License for more details.\r
+\r
+You should have received a copy of the GNU General Public License\r
+along with this program; if not, write to the Free Software\r
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.\r
+"""\r
+import subprocess\r
+import json\r
+import sys\r
+import os\r
+from datetime import datetime\r
+\r
+from classes import Gerrit, Settings, Metric, Repo\r
+\r
+def create_repo_set(gerrit, settings):\r
+ repos = {}\r
+ output = run_gerrit_query('ssh -p 29418 gerrit.wikimedia.org gerrit ls-projects')\r
+ output = output.split('\n')\r
+ for repo in output:\r
+ repo = repo.strip()\r
+ if len(repo) > 1:\r
+ tests = [repo.find(ignore) == -1 for ignore in settings.ignore_repos]\r
+ if all(tests):\r
+ rp = Repo(repo, settings, gerrit)\r
+ repos[rp.name] = rp\r
+ return repos\r
+\r
+\r
+def is_wikimedian(email, whitelist):\r
+ if email in whitelist:\r
+ return True\r
+ if email.endswith('wikimedia.org'):\r
+ return True\r
+ else:\r
+ return False\r
+\r
+\r
+def set_delimiter(fields, counter):\r
+ num_fields = len(fields)\r
+ if num_fields-counter != 1:\r
+ return ','\r
+ else:\r
+ return ''\r
+\r
+def output_results(fh, *args):\r
+ args = [str(arg) for arg in args]\r
+ output = ''.join(args)\r
+ fh.write(output)\r
+ sys.stdout.write(output)\r
+\r
+def write_heading(fh, repo):\r
+ output_results(fh, 'data',',','repository',',')\r
+ #fh.write('%s,%s,' % ('date', 'repository'))\r
+ #sys.stdout.write('%s,%s,' % ('date', 'repository'))\r
+ for metric_counter, (name, metric) in enumerate(repo.dataset.iteritems()):\r
+ headings = metric.keys()\r
+ for counter, heading in enumerate(headings):\r
+ if metric_counter +1 == repo.num_metrics:\r
+ delim = set_delimiter(headings, counter)\r
+ else:\r
+ delim = ','\r
+ #fh.write('%s_%s%s' % (name, heading, delim))\r
+ #sys.stdout.write('%s_%s%s' % (name, heading, delim))\r
+ output_results(fh, name,'_', heading, delim)\r
+ fh.write('\n')\r
+ sys.stdout.write('\n')\r
+\r
+\r
+def construct_timestamp(epoch):\r
+ return datetime.fromtimestamp(epoch)\r
+\r
+\r
+def run_gerrit_query(query):\r
+ query = query.split(' ')\r
+ output = subprocess.Popen(query, shell=False, stdout=subprocess.PIPE).communicate()[0]\r
+ return output\r
+\r
+\r
+def create_dataset(repos, gerrit):\r
+ for key, repo in repos.iteritems():\r
+ fh = open('%s/%s' % (gerrit.data_location, repo.filename), repo.filemode)\r
+ if repo.filemode == 'w':\r
+ write_heading(fh, repo)\r
+ #sys.stdout.write('%s-%s-%s,%s,' % (repo.today.month,repo.today.day,repo.today.year, repo.name))\r
+ #fh.write('%s-%s-%s,%s,' % (repo.today.month,repo.today.day,repo.today.year, repo.name))\r
+ output_results(fh, repo.today.month,'-',repo.today.day,'-',repo.today.year,',',repo.name,',')\r
+ print_dict(repo, fh)\r
+ sys.stdout.write('\n*****************\n')\r
+ sys.stdout.write('\n')\r
+ fh.write('\n')\r
+ fh.close()\r
+\r
+\r
+def print_dict(repo, fh, ident = '', braces=1):\r
+ """ Recursively prints nested dictionaries."""\r
+ dataset = repo.dataset\r
+ for metric_counter, metric in enumerate(dataset):\r
+ fields = dataset[metric].keys()\r
+ for counter, field in enumerate(fields):\r
+ if metric_counter +1 == repo.num_metrics:\r
+ delim = set_delimiter(fields, counter)\r
+ else:\r
+ delim = ','\r
+ #print delim\r
+ sys.stdout.write('%s%s' % (dataset[metric][field], delim))\r
+ fh.write('%s%s' % (dataset[metric][field], delim))\r
+\r
+\r
+def cleanup_volunteers(repos, whitelist):\r
+ for name, repo in repos.iteritems():\r
+ for ws in whitelist:\r
+ if ws in repo.email['volunteer']:\r
+ repo.email['wikimedian'].add(ws)\r
+ repo.email['email']['volunteer'].remove(ws)\r
+ return repos\r
+\r
+\r
+def construct_dataset(settings, repos, metric, output, gerrit): \r
+ output=output.split('\n')\r
+ for obs in output:\r
+ try:\r
+ obs= json.loads(obs)\r
+ except ValueError, e:\r
+ print e\r
+\r
+ if isinstance(obs, dict) and 'rowCount' not in obs:\r
+ try:\r
+ project = obs['project']\r
+ except KeyError, e:\r
+ print e, obs\r
+ email = obs['owner']['email']\r
+ repo = repos.get(project, {})\r
+ if repo == {}:\r
+ continue\r
+ dt = construct_timestamp(obs['createdOn'])\r
+ \r
+ # print "REPO: %s" % repo\r
+ # print "PROJECT: %s" % project\r
+ # print "METRIC: %s" % metric\r
+ # print "DATASET: %s" % repo.dataset\r
+\r
+ if repo.dataset[metric]['oldest'] > dt:\r
+ repo.dataset[metric]['oldest'] = dt\r
+ repo.dataset[metric]['total'] +=1\r
+ if is_wikimedian(email, settings.whitelist) == True:\r
+ repo.dataset[metric]['wikimedian'] +=1\r
+ repo.email['wikimedian'].add(email)\r
+ else:\r
+ repo.dataset[metric]['volunteer'] +=1\r
+ repo.email['volunteer'].add(email)\r
+ repo.touched = True\r
+\r
+\r
+def main():\r
+ gerrit = Gerrit()\r
+ settings = Settings(gerrit)\r
+ print 'Fetching list of all gerrit repositories...'\r
+ repos = create_repo_set(gerrit, settings)\r
+ \r
+ for metric in settings.metrics.itervalues():\r
+ #query = 'ssh -p %s %s gerrit query --format=%s %s' % (gerrit.port, gerrit.host, gerrit.format, question)\r
+ output = run_gerrit_query(metric.query)\r
+ print 'Running %s' % metric.query\r
+ construct_dataset(settings, repos, metric.name, output, gerrit)\r
+\r
+ print 'Fixing miscategorization of volunteer engineers...'\r
+ repos = cleanup_volunteers(repos, settings.whitelist)\r
+ print 'Creating datasets...'\r
+ create_dataset(repos, gerrit)\r
+\r
+\r
+if __name__== '__main__':\r
+ main()
\ No newline at end of file