committer_parser.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. #
  4. # committer_parser.py
  5. #
  6. # Simple script to parse the output of 'git log' and generate some statistics.
  7. # May leverage GitHub API in the future
  8. #
  9. '''
  10. To use this commit parser script pipe git log into the stdin:
  11. git log | committer_parser.py -c -
  12. '''
  13. # pylint: disable=resource-leakage
  14. # Import python libs
  15. from __future__ import absolute_import
  16. from __future__ import print_function
  17. import sys
  18. import getopt
  19. import re
  20. import email.utils
  21. import datetime
  22. class Usage(Exception):
  23. def __init__(self, msg): # pylint: disable=W0231
  24. self.msg = 'committer_parser.py [-c | --contributor-detail] - |' \
  25. ' <logfilename>\n'
  26. self.msg += ' : Parse commit log from git and print number of ' \
  27. 'commits and unique committers\n'
  28. self.msg += ' : by month. Accepts a filename or reads from stdin.\n'
  29. self.msg += ' : -c | --contributor-detail generates output by ' \
  30. 'contributor, by month, in a tab-separated table\n'
  31. if msg:
  32. self.msg += '\n'
  33. self.msg += msg
  34. def parse_date(datestr):
  35. d = email.utils.parsedate(datestr)
  36. return datetime.datetime(d[0], d[1], d[2], d[3], d[4], d[5], d[6])
  37. def parse_gitlog(filename=None):
  38. '''
  39. Parse out the gitlog cli data
  40. '''
  41. results = {}
  42. commits = {}
  43. commits_by_contributor = {}
  44. if not filename or filename == '-':
  45. fh = sys.stdin
  46. else:
  47. fh = open(filename, 'r+')
  48. try:
  49. commitcount = 0
  50. for line in fh.readlines():
  51. line = line.rstrip()
  52. if line.startswith('commit '):
  53. new_commit = True
  54. commitcount += 1
  55. continue
  56. if line.startswith('Author:'):
  57. author = re.match(r'Author:\s+(.*)\s+<(.*)>', line)
  58. if author:
  59. email = author.group(2)
  60. continue
  61. if line.startswith('Date:'):
  62. isodate = re.match(r'Date:\s+(.*)', line)
  63. d = parse_date(isodate.group(1))
  64. continue
  65. if len(line) < 2 and new_commit:
  66. new_commit = False
  67. key = '{0}-{1}'.format(d.year, str(d.month).zfill(2))
  68. if key not in results:
  69. results[key] = []
  70. if key not in commits:
  71. commits[key] = 0
  72. if email not in commits_by_contributor:
  73. commits_by_contributor[email] = {}
  74. if key not in commits_by_contributor[email]:
  75. commits_by_contributor[email][key] = 1
  76. else:
  77. commits_by_contributor[email][key] += 1
  78. if email not in results[key]:
  79. results[key].append(email)
  80. commits[key] += commitcount
  81. commitcount = 0
  82. finally:
  83. fh.close()
  84. return (results, commits, commits_by_contributor)
  85. def counts_by_contributor(commits_by_contributor, results):
  86. output = ''
  87. dates = sorted(results.keys())
  88. for d in dates:
  89. output += '\t{0}'.format(d)
  90. output += '\n'
  91. for email in sorted(commits_by_contributor.keys()):
  92. output += '\'{0}'.format(email)
  93. for d in dates:
  94. if d in commits_by_contributor[email]:
  95. output += '\t{0}'.format(commits_by_contributor[email][d])
  96. else:
  97. output += '\t'
  98. output += '\n'
  99. return output
  100. def count_results(results, commits):
  101. result_str = ''
  102. print('Date\tContributors\tCommits')
  103. for k in sorted(results.keys()):
  104. result_str += '{0}\t{1}\t{2}'.format(k, len(results[k]), commits[k])
  105. result_str += '\n'
  106. return result_str
  107. def main(argv=None):
  108. if argv is None:
  109. argv = sys.argv
  110. try:
  111. try:
  112. opts, args = getopt.getopt(argv[1:], 'hc', ['help', 'contributor-detail'])
  113. if len(args) < 1:
  114. raise Usage('committer_parser.py needs a filename or \'-\' to read from stdin')
  115. except getopt.error as msg:
  116. raise Usage(msg)
  117. except Usage as err:
  118. print(err.msg, file=sys.stderr)
  119. return 2
  120. if len(opts) > 0:
  121. if '-h' in opts[0] or '--help' in opts[0]:
  122. return 0
  123. data, counts, commits_by_contributor = parse_gitlog(filename=args[0])
  124. if len(opts) > 0:
  125. if '-c' or '--contributor-detail':
  126. print(counts_by_contributor(commits_by_contributor, data))
  127. else:
  128. print(count_results(data, counts))
  129. if __name__ == "__main__":
  130. sys.exit(main())