From 00627fd573f921f0db5058c70df93d833f34dad3 Mon Sep 17 00:00:00 2001 From: dsc Date: Fri, 25 May 2012 11:09:15 -0700 Subject: [PATCH] Adds Python packaging and updates README. --- README.md | 81 +++++++++++++++++---- classes.py | 133 --------------------------------- gerritstats/__init__.py | 31 ++++++++ gerritstats/classes.py | 133 +++++++++++++++++++++++++++++++++ gerritstats/stats.py | 187 +++++++++++++++++++++++++++++++++++++++++++++++ setup.py | 51 +++++++++++++ stats.py | 187 ----------------------------------------------- 7 files changed, 469 insertions(+), 334 deletions(-) delete mode 100644 classes.py create mode 100755 gerritstats/__init__.py create mode 100644 gerritstats/classes.py create mode 100755 gerritstats/stats.py create mode 100644 setup.py delete mode 100644 stats.py diff --git a/README.md b/README.md index a85f0ba..cd6fa46 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,76 @@ # Gerrit Code-Review Stats + +`gerrit-stats` generates code-review stats based from Gerrit commits. + + +## Installation + +This package is not in PyPi, as it is WMF-specific. Install it by cloning the repo, and then using `pip` or `easy_install`: + +```sh +git clone git@less.ly:gerrit-stats.git +pip install gerrit-stats +``` + + +## Workflow + This initial version generates two metrics on a day-by-day basis: -1) Number of commits that have been completely untouched, no -1, -2, +1, +2 -2) Number of commits with only +1 -Unfortunately, it proves (AFAIK) impossible to determine the number of commits per day. Gerrit's search is so broken that it is not even funny. The NOT operator does not work and you cannot use any logic when querying for is: +1. Number of commits that have been completely untouched (no `-1`, `-2`, `+1`, `+2` rating) +2. Number of commits with only `+1` + +Unfortunately, it proves (AFAIK) impossible to determine the number of commits per day. Gerrit's +search is so broken that it is not even funny. The NOT operator does not work and you cannot use any +logic when querying for `is:`. + +This means that it is hard to put these numbers in context. + +1. The scripts will query all gerrit repos, except for the repos that are mentioned in the + settings.ignore_repos variable. Currently repos containing the word 'test' are ignored. +2. Queries are run using the SSH interface, each repo has it's own dataset. If the file does not + exist a new file is created with a header line, if the file already exists then the new + observation is appended. + + +## Roadmap + +### Todo + +- This script needs to be puppetized and launched from a cronjob every day at 11:59PM UTC. + Andrew, maybe you can have a look at this next week? +- We need to figure out a way to get total commits per day. +- Right now each repository has it's own dataset but we need to add functionality in Limn to + make logical groups of datasets where the values are summed. + + +### Known Issues + +- Adding a new metric to existing datasets might not work because of the mismatch between number + of headings and number of fields. +- To run gerrit-stats you need an active gerrit ssh account. + + +## Feedback + +Bugs? Feature requests? Feedback? Contact [Diederik van Liere](mailto:dvanliere@wikimedia.org). + -This means that it is hard to put these numbers in context. +## License -Workflow: -1) The scripts will query all gerrit repos, except for the repos that are mentioned in the settings.ignore_repos variable. Currently repos containing the word 'test' are ignored -2) Queries are run using the SSH interface, each repo has it's own dataset. If the file does not exist a new file is created with a header line, if the file already exists then the new observation is appended. +Copyright (C) 2012 Diederik van Liere, Wikimedia Foundation -Todolist: -1) This script needs to be puppetized and launched from a cronjob every day at 11:59PM UTC. Andrew, maybe you can have a look at this next week? -2) We need to figure out a way to get total commits per day -3) RIght now each repository has it's own dataset but we need to add functionality in Limn to make logical groups of datasets where the values are summed. +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. -Known Issues -1) Adding a new metric to existing datasets might not work because of the mismatch between number of headings and number of fields. -2) To run gerrit-stats you need an active gerrit ssh account. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. diff --git a/classes.py b/classes.py deleted file mode 100644 index 51be9fd..0000000 --- a/classes.py +++ /dev/null @@ -1,133 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" -gerrit-stats: Generate codereview stats based from Gerrit commits -Copyright (C) 2012 Diederik van Liere, Wikimedia Foundation - -This program is free software; you can redistribute it and/or -modify it under the terms of the GNU General Public License -as published by the Free Software Foundation; either version 2 -of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -""" - -import os -from datetime import datetime - -class Metric(object): - ''' - The Metric class - ''' - def __init__(self, name, raw_query, settings): - self.raw_query = raw_query - self.name = name - self.query = 'ssh -p %s %s gerrit query --format=%s %s' % (settings.port, settings.host, settings.format, self.raw_query) - - -class Settings(object): - ''' - This object contains properties that apply to all repositories, including the queries that will be - run to generate the statistics, a list of repositories to ignore and a set of engineers that do not use - a WMF email address and hence will be classified as volunteer. - ''' - def __init__(self, settings): - self.queries = { - 'only+1' : '-- CodeReview+1 -CodeReview+2 -CodeReview-1 -CodeReview-2', - 'no_review' : '-- -CodeReview+1 -CodeReview-1 -CodeReview+2 -CodeReview-2', - } - self.whitelist = set([ - 'niklas.laxstrom@gmail.com', - 'roan.kattouw@gmail.com', - 'maxsem.wiki@gmail.com', - 's.mazeland@xs4all.nl', - 'jeroendedauw@gmail.com', - 'mediawiki@danielfriesen.name', - 'jdlrobson@gmail.com', - 'hashar@free.fr' - ]) - self.ignore_repos = ['test'] - self.metrics = {} - self.parents = [ - 'mediawiki/core', - 'mediawiki/extensions', - 'operations', - 'analytics', - ] - - for name, query in self.queries.iteritems(): - self.metrics[name] = Metric(name, query, settings) - - def __str__(self): - return 'Metrics container object' - - -class Gerrit(object): - ''' - This object contains the setings to interact with the gerrit server, nothing fancy these are just - sensible defaults. - ''' - def __init__(self): - self.data_location = 'data' - self.host = 'gerrit.wikimedia.org' - self.port = 29418 - self.format = 'JSON' - - def __str__(self): - return 'Codereview settings object.' - - -class Repo(object): - - def __init__(self, name, settings, gerrit): - self.touched = False - self.name = name - self.dataset = {} - self.create_path(self.name, gerrit) - self.filename = ('%s.csv' % (self.determine_filename(self.name))) - self.filemode = self.determine_filemode(self.filename, gerrit) - - self.today = datetime.today() - self.email = {} - self.email['wikimedian'] = set() - self.email['volunteer'] = set() - self.num_metrics = 0 - - for metric in settings.metrics: - self.dataset[metric] = {} - self.dataset[metric]['oldest'] = datetime(2030,1,1) - self.dataset[metric]['wikimedian'] = 0 - self.dataset[metric]['volunteer'] = 0 - self.dataset[metric]['total'] = 0 - self.num_metrics +=1 - - def __str__(self): - return self.name - - def create_path(self, filename, gerrit): - print filename - dir= os.path.dirname(filename) - if dir != '': - dir = os.path.join(gerrit.data_location, dir) - try: - os.makedirs(dir) - print 'Creating %s...' % dir - except OSError: - pass - - def determine_filename(self, filename): - return os.path.basename(filename) - - def determine_filemode(self, filename, settings): - if os.path.isfile('%s/%s' % (settings.data_location, filename)) == False: - return 'w' - else: - return 'a' - diff --git a/gerritstats/__init__.py b/gerritstats/__init__.py new file mode 100755 index 0000000..57a582b --- /dev/null +++ b/gerritstats/__init__.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +gerrit-stats: Generate codereview stats based from Gerrit commits +Copyright (C) 2012 Diederik van Liere, Wikimedia Foundation + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +""" + +__version__ = '0.1.0' +VERSION = tuple(map(int, __version__.split('.'))) + +from classes import Gerrit, Settings, Metric, Repo +from stats import main + + +if __name__== '__main__': + main() + diff --git a/gerritstats/classes.py b/gerritstats/classes.py new file mode 100644 index 0000000..51be9fd --- /dev/null +++ b/gerritstats/classes.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +gerrit-stats: Generate codereview stats based from Gerrit commits +Copyright (C) 2012 Diederik van Liere, Wikimedia Foundation + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +""" + +import os +from datetime import datetime + +class Metric(object): + ''' + The Metric class + ''' + def __init__(self, name, raw_query, settings): + self.raw_query = raw_query + self.name = name + self.query = 'ssh -p %s %s gerrit query --format=%s %s' % (settings.port, settings.host, settings.format, self.raw_query) + + +class Settings(object): + ''' + This object contains properties that apply to all repositories, including the queries that will be + run to generate the statistics, a list of repositories to ignore and a set of engineers that do not use + a WMF email address and hence will be classified as volunteer. + ''' + def __init__(self, settings): + self.queries = { + 'only+1' : '-- CodeReview+1 -CodeReview+2 -CodeReview-1 -CodeReview-2', + 'no_review' : '-- -CodeReview+1 -CodeReview-1 -CodeReview+2 -CodeReview-2', + } + self.whitelist = set([ + 'niklas.laxstrom@gmail.com', + 'roan.kattouw@gmail.com', + 'maxsem.wiki@gmail.com', + 's.mazeland@xs4all.nl', + 'jeroendedauw@gmail.com', + 'mediawiki@danielfriesen.name', + 'jdlrobson@gmail.com', + 'hashar@free.fr' + ]) + self.ignore_repos = ['test'] + self.metrics = {} + self.parents = [ + 'mediawiki/core', + 'mediawiki/extensions', + 'operations', + 'analytics', + ] + + for name, query in self.queries.iteritems(): + self.metrics[name] = Metric(name, query, settings) + + def __str__(self): + return 'Metrics container object' + + +class Gerrit(object): + ''' + This object contains the setings to interact with the gerrit server, nothing fancy these are just + sensible defaults. + ''' + def __init__(self): + self.data_location = 'data' + self.host = 'gerrit.wikimedia.org' + self.port = 29418 + self.format = 'JSON' + + def __str__(self): + return 'Codereview settings object.' + + +class Repo(object): + + def __init__(self, name, settings, gerrit): + self.touched = False + self.name = name + self.dataset = {} + self.create_path(self.name, gerrit) + self.filename = ('%s.csv' % (self.determine_filename(self.name))) + self.filemode = self.determine_filemode(self.filename, gerrit) + + self.today = datetime.today() + self.email = {} + self.email['wikimedian'] = set() + self.email['volunteer'] = set() + self.num_metrics = 0 + + for metric in settings.metrics: + self.dataset[metric] = {} + self.dataset[metric]['oldest'] = datetime(2030,1,1) + self.dataset[metric]['wikimedian'] = 0 + self.dataset[metric]['volunteer'] = 0 + self.dataset[metric]['total'] = 0 + self.num_metrics +=1 + + def __str__(self): + return self.name + + def create_path(self, filename, gerrit): + print filename + dir= os.path.dirname(filename) + if dir != '': + dir = os.path.join(gerrit.data_location, dir) + try: + os.makedirs(dir) + print 'Creating %s...' % dir + except OSError: + pass + + def determine_filename(self, filename): + return os.path.basename(filename) + + def determine_filemode(self, filename, settings): + if os.path.isfile('%s/%s' % (settings.data_location, filename)) == False: + return 'w' + else: + return 'a' + diff --git a/gerritstats/stats.py b/gerritstats/stats.py new file mode 100755 index 0000000..242974c --- /dev/null +++ b/gerritstats/stats.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +gerrit-stats: Generate codereview stats based from Gerrit commits +Copyright (C) 2012 Diederik van Liere, Wikimedia Foundation + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +""" +import subprocess +import json +import sys +import os +from datetime import datetime + +from classes import Gerrit, Settings, Metric, Repo + +def create_repo_set(gerrit, settings): + repos = {} + output = run_gerrit_query('ssh -p 29418 gerrit.wikimedia.org gerrit ls-projects') + output = output.split('\n') + for repo in output: + repo = repo.strip() + if len(repo) > 1: + tests = [repo.find(ignore) == -1 for ignore in settings.ignore_repos] + if all(tests): + rp = Repo(repo, settings, gerrit) + repos[rp.name] = rp + return repos + + +def is_wikimedian(email, whitelist): + if email in whitelist: + return True + if email.endswith('wikimedia.org'): + return True + else: + return False + + +def set_delimiter(fields, counter): + num_fields = len(fields) + if num_fields-counter != 1: + return ',' + else: + return '' + +def output_results(fh, *args): + args = [str(arg) for arg in args] + output = ''.join(args) + fh.write(output) + sys.stdout.write(output) + +def write_heading(fh, repo): + output_results(fh, 'data',',','repository',',') + #fh.write('%s,%s,' % ('date', 'repository')) + #sys.stdout.write('%s,%s,' % ('date', 'repository')) + for metric_counter, (name, metric) in enumerate(repo.dataset.iteritems()): + headings = metric.keys() + for counter, heading in enumerate(headings): + if metric_counter +1 == repo.num_metrics: + delim = set_delimiter(headings, counter) + else: + delim = ',' + #fh.write('%s_%s%s' % (name, heading, delim)) + #sys.stdout.write('%s_%s%s' % (name, heading, delim)) + output_results(fh, name,'_', heading, delim) + fh.write('\n') + sys.stdout.write('\n') + + +def construct_timestamp(epoch): + return datetime.fromtimestamp(epoch) + + +def run_gerrit_query(query): + query = query.split(' ') + output = subprocess.Popen(query, shell=False, stdout=subprocess.PIPE).communicate()[0] + return output + + +def create_dataset(repos, gerrit): + for key, repo in repos.iteritems(): + fh = open('%s/%s' % (gerrit.data_location, repo.filename), repo.filemode) + if repo.filemode == 'w': + write_heading(fh, repo) + #sys.stdout.write('%s-%s-%s,%s,' % (repo.today.month,repo.today.day,repo.today.year, repo.name)) + #fh.write('%s-%s-%s,%s,' % (repo.today.month,repo.today.day,repo.today.year, repo.name)) + output_results(fh, repo.today.month,'-',repo.today.day,'-',repo.today.year,',',repo.name,',') + print_dict(repo, fh) + sys.stdout.write('\n*****************\n') + sys.stdout.write('\n') + fh.write('\n') + fh.close() + + +def print_dict(repo, fh, ident = '', braces=1): + """ Recursively prints nested dictionaries.""" + dataset = repo.dataset + for metric_counter, metric in enumerate(dataset): + fields = dataset[metric].keys() + for counter, field in enumerate(fields): + if metric_counter +1 == repo.num_metrics: + delim = set_delimiter(fields, counter) + else: + delim = ',' + #print delim + sys.stdout.write('%s%s' % (dataset[metric][field], delim)) + fh.write('%s%s' % (dataset[metric][field], delim)) + + +def cleanup_volunteers(repos, whitelist): + for name, repo in repos.iteritems(): + for ws in whitelist: + if ws in repo.email['volunteer']: + repo.email['wikimedian'].add(ws) + repo.email['email']['volunteer'].remove(ws) + return repos + + +def construct_dataset(settings, repos, metric, output, gerrit): + output = output.split('\n') + for obs in output: + try: + obs= json.loads(obs) + except ValueError, e: + print e + + if isinstance(obs, dict) and 'rowCount' not in obs: + try: + project = obs['project'] + except KeyError, e: + print e, obs + email = obs['owner']['email'] + repo = repos.get(project, {}) + if repo == {}: + continue + dt = construct_timestamp(obs['createdOn']) + + # print "REPO: %s" % repo + # print "PROJECT: %s" % project + # print "METRIC: %s" % metric + # print "DATASET: %s" % repo.dataset + + if repo.dataset[metric]['oldest'] > dt: + repo.dataset[metric]['oldest'] = dt + repo.dataset[metric]['total'] +=1 + if is_wikimedian(email, settings.whitelist) == True: + repo.dataset[metric]['wikimedian'] +=1 + repo.email['wikimedian'].add(email) + else: + repo.dataset[metric]['volunteer'] +=1 + repo.email['volunteer'].add(email) + repo.touched = True + + +def main(): + gerrit = Gerrit() + settings = Settings(gerrit) + print 'Fetching list of all gerrit repositories...\n' + repos = create_repo_set(gerrit, settings) + + for metric in settings.metrics.itervalues(): + #query = 'ssh -p %s %s gerrit query --format=%s %s' % (gerrit.port, gerrit.host, gerrit.format, question) + output = run_gerrit_query(metric.query) + print 'Running %s' % metric.query + construct_dataset(settings, repos, metric.name, output, gerrit) + + print 'Fixing miscategorization of volunteer engineers...' + repos = cleanup_volunteers(repos, settings.whitelist) + print 'Creating datasets...' + create_dataset(repos, gerrit) + + +if __name__== '__main__': + main() diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..bb4fbaa --- /dev/null +++ b/setup.py @@ -0,0 +1,51 @@ +#!python +# -*- coding: utf-8 -*- +import sys, os, re +from os.path import dirname, abspath, join +from setuptools import setup, find_packages + +HERE = abspath(dirname(__file__)) +readme = open(join(HERE, 'README.md'), 'rU').read() + +package_file = open(join(HERE, 'gerritstats/__init__.py'), 'rU') +__version__ = re.sub( + r".*\b__version__\s+=\s+'([^']+)'.*", + r'\1', + [ line.strip() for line in package_file if '__version__' in line ].pop(0) +) + + +setup( + name = 'gerrit-stats', + version = __version__, + description = 'Generate codereview stats based from Gerrit commits', + long_description = readme, + url = 'http://git.less.ly/?p=gerrit-stats.git', + + author = 'Diederik van Liere', + author_email = 'dvanliere@wikimedia.org', + + packages = find_packages(), + entry_points = { 'console_scripts':['gerrit-stats = gerritstats:main'] }, + + # install_requires = [ + # "bunch >= 1.0", + # "PyYAML >= 3.10", + # ], + + keywords = ['gerrit', 'stats'], + classifiers = [ + "Development Status :: 4 - Beta", + "Environment :: Console", + "Intended Audience :: Developers", + "Topic :: Utilities" + "Topic :: Software Development :: Libraries :: Python Modules", + "Programming Language :: Python", + "Programming Language :: Python :: 2.6", + "Programming Language :: Python :: 2.7", + "Operating System :: OS Independent", + "License :: OSI Approved :: GPLv2 License", + ], + zip_safe = False, + license = "GPLv2", +) diff --git a/stats.py b/stats.py deleted file mode 100644 index fca1ad5..0000000 --- a/stats.py +++ /dev/null @@ -1,187 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" -gerrit-stats: Generate codereview stats based from Gerrit commits -Copyright (C) 2012 Diederik van Liere, Wikimedia Foundation - -This program is free software; you can redistribute it and/or -modify it under the terms of the GNU General Public License -as published by the Free Software Foundation; either version 2 -of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -""" -import subprocess -import json -import sys -import os -from datetime import datetime - -from classes import Gerrit, Settings, Metric, Repo - -def create_repo_set(gerrit, settings): - repos = {} - output = run_gerrit_query('ssh -p 29418 gerrit.wikimedia.org gerrit ls-projects') - output = output.split('\n') - for repo in output: - repo = repo.strip() - if len(repo) > 1: - tests = [repo.find(ignore) == -1 for ignore in settings.ignore_repos] - if all(tests): - rp = Repo(repo, settings, gerrit) - repos[rp.name] = rp - return repos - - -def is_wikimedian(email, whitelist): - if email in whitelist: - return True - if email.endswith('wikimedia.org'): - return True - else: - return False - - -def set_delimiter(fields, counter): - num_fields = len(fields) - if num_fields-counter != 1: - return ',' - else: - return '' - -def output_results(fh, *args): - args = [str(arg) for arg in args] - output = ''.join(args) - fh.write(output) - sys.stdout.write(output) - -def write_heading(fh, repo): - output_results(fh, 'data',',','repository',',') - #fh.write('%s,%s,' % ('date', 'repository')) - #sys.stdout.write('%s,%s,' % ('date', 'repository')) - for metric_counter, (name, metric) in enumerate(repo.dataset.iteritems()): - headings = metric.keys() - for counter, heading in enumerate(headings): - if metric_counter +1 == repo.num_metrics: - delim = set_delimiter(headings, counter) - else: - delim = ',' - #fh.write('%s_%s%s' % (name, heading, delim)) - #sys.stdout.write('%s_%s%s' % (name, heading, delim)) - output_results(fh, name,'_', heading, delim) - fh.write('\n') - sys.stdout.write('\n') - - -def construct_timestamp(epoch): - return datetime.fromtimestamp(epoch) - - -def run_gerrit_query(query): - query = query.split(' ') - output = subprocess.Popen(query, shell=False, stdout=subprocess.PIPE).communicate()[0] - return output - - -def create_dataset(repos, gerrit): - for key, repo in repos.iteritems(): - fh = open('%s/%s' % (gerrit.data_location, repo.filename), repo.filemode) - if repo.filemode == 'w': - write_heading(fh, repo) - #sys.stdout.write('%s-%s-%s,%s,' % (repo.today.month,repo.today.day,repo.today.year, repo.name)) - #fh.write('%s-%s-%s,%s,' % (repo.today.month,repo.today.day,repo.today.year, repo.name)) - output_results(fh, repo.today.month,'-',repo.today.day,'-',repo.today.year,',',repo.name,',') - print_dict(repo, fh) - sys.stdout.write('\n*****************\n') - sys.stdout.write('\n') - fh.write('\n') - fh.close() - - -def print_dict(repo, fh, ident = '', braces=1): - """ Recursively prints nested dictionaries.""" - dataset = repo.dataset - for metric_counter, metric in enumerate(dataset): - fields = dataset[metric].keys() - for counter, field in enumerate(fields): - if metric_counter +1 == repo.num_metrics: - delim = set_delimiter(fields, counter) - else: - delim = ',' - #print delim - sys.stdout.write('%s%s' % (dataset[metric][field], delim)) - fh.write('%s%s' % (dataset[metric][field], delim)) - - -def cleanup_volunteers(repos, whitelist): - for name, repo in repos.iteritems(): - for ws in whitelist: - if ws in repo.email['volunteer']: - repo.email['wikimedian'].add(ws) - repo.email['email']['volunteer'].remove(ws) - return repos - - -def construct_dataset(settings, repos, metric, output, gerrit): - output = output.split('\n') - for obs in output: - try: - obs= json.loads(obs) - except ValueError, e: - print e - - if isinstance(obs, dict) and 'rowCount' not in obs: - try: - project = obs['project'] - except KeyError, e: - print e, obs - email = obs['owner']['email'] - repo = repos.get(project, {}) - if repo == {}: - continue - dt = construct_timestamp(obs['createdOn']) - - # print "REPO: %s" % repo - # print "PROJECT: %s" % project - # print "METRIC: %s" % metric - # print "DATASET: %s" % repo.dataset - - if repo.dataset[metric]['oldest'] > dt: - repo.dataset[metric]['oldest'] = dt - repo.dataset[metric]['total'] +=1 - if is_wikimedian(email, settings.whitelist) == True: - repo.dataset[metric]['wikimedian'] +=1 - repo.email['wikimedian'].add(email) - else: - repo.dataset[metric]['volunteer'] +=1 - repo.email['volunteer'].add(email) - repo.touched = True - - -def main(): - gerrit = Gerrit() - settings = Settings(gerrit) - print 'Fetching list of all gerrit repositories...' - repos = create_repo_set(gerrit, settings) - - for metric in settings.metrics.itervalues(): - #query = 'ssh -p %s %s gerrit query --format=%s %s' % (gerrit.port, gerrit.host, gerrit.format, question) - output = run_gerrit_query(metric.query) - print 'Running %s' % metric.query - construct_dataset(settings, repos, metric.name, output, gerrit) - - print 'Fixing miscategorization of volunteer engineers...' - repos = cleanup_volunteers(repos, settings.whitelist) - print 'Creating datasets...' - create_dataset(repos, gerrit) - - -if __name__== '__main__': - main() \ No newline at end of file -- 1.7.0.4