123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170 |
- #!/usr/bin/python
- # -*- coding: utf-8 -*-
- #
- # committer_parser.py
- #
- # Simple script to parse the output of 'git log' and generate some statistics.
- # May leverage GitHub API in the future
- #
- """
- To use this commit parser script pipe git log into the stdin:
- git log | committer_parser.py -c -
- """
- # pylint: disable=resource-leakage
- # Import python libs
- from __future__ import absolute_import, print_function
- import datetime
- import email.utils
- import getopt
- import re
- import sys
- class Usage(Exception):
- def __init__(self, msg): # pylint: disable=W0231
- self.msg = (
- "committer_parser.py [-c | --contributor-detail] - |" " <logfilename>\n"
- )
- self.msg += (
- " : Parse commit log from git and print number of "
- "commits and unique committers\n"
- )
- self.msg += " : by month. Accepts a filename or reads from stdin.\n"
- self.msg += (
- " : -c | --contributor-detail generates output by "
- "contributor, by month, in a tab-separated table\n"
- )
- if msg:
- self.msg += "\n"
- self.msg += msg
- def parse_date(datestr):
- d = email.utils.parsedate(datestr)
- return datetime.datetime(d[0], d[1], d[2], d[3], d[4], d[5], d[6])
- def parse_gitlog(filename=None):
- """
- Parse out the gitlog cli data
- """
- results = {}
- commits = {}
- commits_by_contributor = {}
- if not filename or filename == "-":
- fh = sys.stdin
- else:
- fh = open(filename, "r+")
- try:
- commitcount = 0
- for line in fh.readlines():
- line = line.rstrip()
- if line.startswith("commit "):
- new_commit = True
- commitcount += 1
- continue
- if line.startswith("Author:"):
- author = re.match(r"Author:\s+(.*)\s+<(.*)>", line)
- if author:
- email = author.group(2)
- continue
- if line.startswith("Date:"):
- isodate = re.match(r"Date:\s+(.*)", line)
- d = parse_date(isodate.group(1))
- continue
- if len(line) < 2 and new_commit:
- new_commit = False
- key = "{0}-{1}".format(d.year, str(d.month).zfill(2))
- if key not in results:
- results[key] = []
- if key not in commits:
- commits[key] = 0
- if email not in commits_by_contributor:
- commits_by_contributor[email] = {}
- if key not in commits_by_contributor[email]:
- commits_by_contributor[email][key] = 1
- else:
- commits_by_contributor[email][key] += 1
- if email not in results[key]:
- results[key].append(email)
- commits[key] += commitcount
- commitcount = 0
- finally:
- fh.close()
- return (results, commits, commits_by_contributor)
- def counts_by_contributor(commits_by_contributor, results):
- output = ""
- dates = sorted(results.keys())
- for d in dates:
- output += "\t{0}".format(d)
- output += "\n"
- for email in sorted(commits_by_contributor.keys()):
- output += "'{0}".format(email)
- for d in dates:
- if d in commits_by_contributor[email]:
- output += "\t{0}".format(commits_by_contributor[email][d])
- else:
- output += "\t"
- output += "\n"
- return output
- def count_results(results, commits):
- result_str = ""
- print("Date\tContributors\tCommits")
- for k in sorted(results.keys()):
- result_str += "{0}\t{1}\t{2}".format(k, len(results[k]), commits[k])
- result_str += "\n"
- return result_str
- def main(argv=None):
- if argv is None:
- argv = sys.argv
- try:
- try:
- opts, args = getopt.getopt(argv[1:], "hc", ["help", "contributor-detail"])
- if len(args) < 1:
- raise Usage(
- "committer_parser.py needs a filename or '-' to read from stdin"
- )
- except getopt.error as msg:
- raise Usage(msg)
- except Usage as err:
- print(err.msg, file=sys.stderr)
- return 2
- if len(opts) > 0:
- if "-h" in opts[0] or "--help" in opts[0]:
- return 0
- data, counts, commits_by_contributor = parse_gitlog(filename=args[0])
- if len(opts) > 0:
- if "-c" or "--contributor-detail":
- print(counts_by_contributor(commits_by_contributor, data))
- else:
- print(count_results(data, counts))
- if __name__ == "__main__":
- sys.exit(main())
|