99# Description: Generate a list of kata contributors by extracting contact
1010# information from GitHub
1111
12+ import argparse
1213import datetime
13- import pytz
14+ from datetime import timedelta
15+ import os
1416import re
1517import yaml
1618
@@ -68,23 +70,22 @@ def _author_representer(dumper, data):
6870 commit_count = data .commit_count )
6971 return dumper .represent_dict (o_dict .items ())
7072
71-
72- dco_re = re .compile ('signed.off.by[: ]*(?P<name>[^<]*)<(?P<email>.*)>$' ,
73- re .IGNORECASE | re .MULTILINE )
74- # Get a token GitHub Personal API token see:
75- # https://blog.github.com/2013-05-16-personal-api-tokens/
76- # for more information.
77- gh = login (token = '__API_TOKEN__' )
78- org = gh .organization ('kata-containers' )
79- # Example dates for testing.
80- start_time = datetime .datetime (2018 , 1 , 1 , 0 , 0 , 0 , tzinfo = pytz .UTC )
81- end_time = datetime .datetime (2018 , 8 , 1 , 0 , 0 , 0 , tzinfo = pytz .UTC )
82- # ... Or run just include all commits
83- # start_time = end_time = None
84- # All commits
85- number = - 1
86- projects = []
87- ignored_repos = [
73+ def find_authors_by_project (start_time , end_time ):
74+ dco_re = re .compile ('signed.off.by[: ]*(?P<name>[^<]*)<(?P<email>.*)>$' ,
75+ re .IGNORECASE | re .MULTILINE )
76+ # Get a token GitHub Personal API token see:
77+ # https://blog.github.com/2013-05-16-personal-api-tokens/
78+ # for more information.
79+ try :
80+ personal_token = os .environ ['GH_TOKEN' ]
81+ except KeyError :
82+ raise Exception ("GH_TOKEN environment variable was not set" )
83+
84+ gh = login (token = personal_token )
85+ org = gh .organization ('kata-containers' )
86+ number = - 1
87+ projects = []
88+ ignored_repos = [
8889 'agent' ,
8990 'ci' ,
9091 'dbs-snapshot' ,
@@ -105,77 +106,97 @@ def _author_representer(dumper, data):
105106 'shim' ,
106107 'slash-command-action' ,
107108 'tests' ,
108- ]
109-
109+ ]
110110
111- author_cache = {}
112- for repo in org .repositories ():
113- # Skip these repos as they are not a core part of the project, and are
114- # forked/imported/archived so contain many contributors from outside the project.
115- # Also skip the github security advisory repos for quicker processing
116- if str (repo ).split ("/" )[1 ] in ignored_repos or str (repo ).split ("/" )[1 ].startswith ('kata-containers-ghsa' ):
117- print ('Skipping repo %s' % (repo ))
118- continue
119- print ('Looking for changes in %s between %s and %s' %
120- (repo , start_time , end_time ))
121111
122- authors = AuthorSet ()
123- for commit in repo .commits (since = start_time , until = end_time , number = number ):
124-
125- # If a commit has >1 parents then it's a merge commit, so skip these
126- if len (commit .parents ) > 1 :
112+ author_cache = {}
113+ for repo in org .repositories ():
114+ # Skip these repos as they are not a core part of the project, and are
115+ # forked/imported/archived so contain many contributors from outside the project.
116+ # Also skip the github security advisory repos for quicker processing
117+ if str (repo ).split ("/" )[1 ] in ignored_repos or str (repo ).split ("/" )[1 ].startswith ('kata-containers-ghsa' ):
118+ print ('Skipping repo %s' % (repo ))
127119 continue
120+ print ('Looking for changes in %s between %s and %s' %
121+ (repo , start_time , end_time ))
128122
129- if commit . author is None :
130- if commit . commit . author is None :
131- print ( 'Skipping %s in %s as it has no author. Did this merge via GitHub?' %
132- (commit , repo ))
123+ authors = AuthorSet ()
124+ for commit in repo . commits ( since = start_time , until = end_time , number = number ) :
125+ # If a commit has >1 parents then it's a merge commit, so skip these
126+ if len (commit . parents ) > 1 :
133127 continue
134128
135- author_id = commit .commit .author .get ('email' )
136- print ('%s in %s as has no author. Using email (%s) as the author id' %
137- (commit , repo , author_id ))
138- else :
139- author_id = commit .author .login
140-
141- if author_id not in author_cache :
142129 if commit .author is None :
143- author = Author (author_id , email = author_id ,
144- name = commit .commit .author .get ('name' ))
130+ if commit .commit .author is None :
131+ print ('Skipping %s in %s as it has no author. Did this merge via GitHub?' %
132+ (commit , repo ))
133+ continue
134+
135+ author_id = commit .commit .author .get ('email' )
136+ print ('%s in %s as has no author. Using email (%s) as the author id' %
137+ (commit , repo , author_id ))
145138 else :
146- _author = gh .user (commit .author .login )
147- author = Author (_author .login , email = _author .email ,
148- name = _author .name )
149-
150- author_cache [author_id ] = author
151-
152- author = author_cache [author_id ]
153- author .commit_count += 1
154-
155- # If the GitHub account doesn't have a name or email address
156- # the author *may* have included it in their git config.
157- if author .email is None and commit .commit .author .get ('email' ):
158- author .email = commit .commit .author .get ('email' )
159- if author .name is None and commit .commit .author .get ('name' ):
160- author .name = commit .commit .author .get ('name' )
161-
162- # last ditch effort did the author use a valid email address in the
163- # DCO line?
164- match = dco_re .search (commit .message )
165- if match :
166- if ((author .email is None or
167- 'users.noreply.github.com' in author .email ) and
168- match .group ('email' )):
169- author .email = match .group ('email' )
170- if author .name is None and match .group ('name' ):
171- author .name = match .group ('name' )
172- authors .add (author )
173- projects .append ({str (repo ): authors })
174-
175- # Dark YAML voodoo
176- yaml .Dumper .ignore_aliases = lambda * args : True
177- yaml .Dumper .add_representer (AuthorSet , _authorset_representer )
178- yaml .Dumper .add_representer (Author , _author_representer )
179- with open ('electorate.yaml' , 'w' ) as f :
180- yaml .dump (projects , f , default_flow_style = False , default_style = '' ,
181- explicit_start = True )
139+ author_id = commit .author .login
140+
141+ if author_id not in author_cache :
142+ if commit .author is None :
143+ author = Author (author_id , email = author_id ,
144+ name = commit .commit .author .get ('name' ))
145+ else :
146+ _author = gh .user (commit .author .login )
147+ author = Author (_author .login , email = _author .email ,
148+ name = _author .name )
149+
150+ author_cache [author_id ] = author
151+
152+ author = author_cache [author_id ]
153+ author .commit_count += 1
154+
155+ # If the GitHub account doesn't have a name or email address
156+ # the author *may* have included it in their git config.
157+ if author .email is None and commit .commit .author .get ('email' ):
158+ author .email = commit .commit .author .get ('email' )
159+ if author .name is None and commit .commit .author .get ('name' ):
160+ author .name = commit .commit .author .get ('name' )
161+
162+ # last ditch effort did the author use a valid email address in the
163+ # DCO line?
164+ match = dco_re .search (commit .message )
165+ if match :
166+ if ((author .email is None or
167+ 'users.noreply.github.com' in author .email ) and
168+ match .group ('email' )):
169+ author .email = match .group ('email' )
170+ if author .name is None and match .group ('name' ):
171+ author .name = match .group ('name' )
172+ authors .add (author )
173+ projects .append ({str (repo ): authors })
174+ return projects
175+
176+ def main ():
177+
178+ parser = argparse .ArgumentParser (description = 'An electorate generation script' )
179+ parser .add_argument ("-end" , required = True ,help = 'the end date of the period to examine in format %%d/%%m/%%y.' )
180+ parser .add_argument ("-start" , help = 'the start date of the period to examine in format %%d/%%m/%%y. If not set will default to' \
181+ '365 days before the end time' )
182+
183+ args = parser .parse_args ()
184+ end_time = datetime .datetime .strptime (args .end , '%d/%m/%y' )
185+ start_time = end_time - timedelta (days = 365 )
186+ if args .start != None :
187+ start_time = datetime .datetime .strptime (args .start , '%d/%m/%y' )
188+
189+ print ("Getting committers from" , start_time , " -> " , end_time )
190+
191+ projects = find_authors_by_project (start_time , end_time )
192+
193+ # Dark YAML voodoo
194+ yaml .Dumper .ignore_aliases = lambda * args : True
195+ yaml .Dumper .add_representer (AuthorSet , _authorset_representer )
196+ yaml .Dumper .add_representer (Author , _author_representer )
197+ with open ('electorate.yaml' , 'w' ) as f :
198+ yaml .dump (projects , f , default_flow_style = False , default_style = '' ,
199+ explicit_start = True )
200+
201+ if __name__ == '__main__' :
202+ main ()
0 commit comments