committer_parser.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. #
  4. # committer_parser.py
  5. #
  6. # Simple script to parse the output of 'git log' and generate some statistics.
  7. # May leverage GitHub API in the future
  8. #
  9. """
  10. To use this commit parser script pipe git log into the stdin:
  11. git log | committer_parser.py -c -
  12. """
  13. # pylint: disable=resource-leakage
  14. # Import python libs
  15. from __future__ import absolute_import, print_function
  16. import datetime
  17. import email.utils
  18. import getopt
  19. import re
  20. import sys
  21. class Usage(Exception):
  22. def __init__(self, msg): # pylint: disable=W0231
  23. self.msg = (
  24. "committer_parser.py [-c | --contributor-detail] - |" " <logfilename>\n"
  25. )
  26. self.msg += (
  27. " : Parse commit log from git and print number of "
  28. "commits and unique committers\n"
  29. )
  30. self.msg += " : by month. Accepts a filename or reads from stdin.\n"
  31. self.msg += (
  32. " : -c | --contributor-detail generates output by "
  33. "contributor, by month, in a tab-separated table\n"
  34. )
  35. if msg:
  36. self.msg += "\n"
  37. self.msg += msg
  38. def parse_date(datestr):
  39. d = email.utils.parsedate(datestr)
  40. return datetime.datetime(d[0], d[1], d[2], d[3], d[4], d[5], d[6])
  41. def parse_gitlog(filename=None):
  42. """
  43. Parse out the gitlog cli data
  44. """
  45. results = {}
  46. commits = {}
  47. commits_by_contributor = {}
  48. if not filename or filename == "-":
  49. fh = sys.stdin
  50. else:
  51. fh = open(filename, "r+")
  52. try:
  53. commitcount = 0
  54. for line in fh.readlines():
  55. line = line.rstrip()
  56. if line.startswith("commit "):
  57. new_commit = True
  58. commitcount += 1
  59. continue
  60. if line.startswith("Author:"):
  61. author = re.match(r"Author:\s+(.*)\s+<(.*)>", line)
  62. if author:
  63. email = author.group(2)
  64. continue
  65. if line.startswith("Date:"):
  66. isodate = re.match(r"Date:\s+(.*)", line)
  67. d = parse_date(isodate.group(1))
  68. continue
  69. if len(line) < 2 and new_commit:
  70. new_commit = False
  71. key = "{0}-{1}".format(d.year, str(d.month).zfill(2))
  72. if key not in results:
  73. results[key] = []
  74. if key not in commits:
  75. commits[key] = 0
  76. if email not in commits_by_contributor:
  77. commits_by_contributor[email] = {}
  78. if key not in commits_by_contributor[email]:
  79. commits_by_contributor[email][key] = 1
  80. else:
  81. commits_by_contributor[email][key] += 1
  82. if email not in results[key]:
  83. results[key].append(email)
  84. commits[key] += commitcount
  85. commitcount = 0
  86. finally:
  87. fh.close()
  88. return (results, commits, commits_by_contributor)
  89. def counts_by_contributor(commits_by_contributor, results):
  90. output = ""
  91. dates = sorted(results.keys())
  92. for d in dates:
  93. output += "\t{0}".format(d)
  94. output += "\n"
  95. for email in sorted(commits_by_contributor.keys()):
  96. output += "'{0}".format(email)
  97. for d in dates:
  98. if d in commits_by_contributor[email]:
  99. output += "\t{0}".format(commits_by_contributor[email][d])
  100. else:
  101. output += "\t"
  102. output += "\n"
  103. return output
  104. def count_results(results, commits):
  105. result_str = ""
  106. print("Date\tContributors\tCommits")
  107. for k in sorted(results.keys()):
  108. result_str += "{0}\t{1}\t{2}".format(k, len(results[k]), commits[k])
  109. result_str += "\n"
  110. return result_str
  111. def main(argv=None):
  112. if argv is None:
  113. argv = sys.argv
  114. try:
  115. try:
  116. opts, args = getopt.getopt(argv[1:], "hc", ["help", "contributor-detail"])
  117. if len(args) < 1:
  118. raise Usage(
  119. "committer_parser.py needs a filename or '-' to read from stdin"
  120. )
  121. except getopt.error as msg:
  122. raise Usage(msg)
  123. except Usage as err:
  124. print(err.msg, file=sys.stderr)
  125. return 2
  126. if len(opts) > 0:
  127. if "-h" in opts[0] or "--help" in opts[0]:
  128. return 0
  129. data, counts, commits_by_contributor = parse_gitlog(filename=args[0])
  130. if len(opts) > 0:
  131. if "-c" or "--contributor-detail":
  132. print(counts_by_contributor(commits_by_contributor, data))
  133. else:
  134. print(count_results(data, counts))
  135. if __name__ == "__main__":
  136. sys.exit(main())