Skip to content

Commit fe5b293

Browse files
committed
elections/tools: Use CLI arguments
Updates to mean the user doesn't need to edit code to customise the script, but instead: - Read the github token from an env - Accept the end and (optional) start dates to use in the commit scans Update the python file to have functions and a `main` method Signed-off-by: stevenhorsman <[email protected]>
1 parent 858388f commit fe5b293

File tree

2 files changed

+112
-101
lines changed

2 files changed

+112
-101
lines changed

elections/tools/README.md

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,29 +22,19 @@ $ python3 -m venv .venv
2222
$ .venv/bin/pip install pytz github3.py pyyaml
2323
```
2424

25-
Before running the tool you will need to create a
25+
Before running the tool you will need to create a personal
2626
[GitHub API token](https://github.blog/2013-05-16-personal-api-tokens/)
27+
set as an environment variable called: `GH_TOKEN`
2728

28-
replace `__API_TOKEN__` in the script with your personal token.
29-
30-
Also update the election start and end times to cover the period being
31-
examined for this election period. The lines to edit look like:
32-
33-
```python
34-
start_time = datetime.datetime(2018, 1, 1, 0, 0, 0, tzinfo=pytz.UTC)
35-
end_time = datetime.datetime(2018, 8, 1, 0, 0, 0, tzinfo=pytz.UTC)
36-
```
37-
38-
Then run the tool with:
29+
Then run the tool supplying `-end <date of candidate nomination in %d/%m/%y format>`.
30+
e.g. if the nomination period began on 7th April 2025:
3931

4032
```bash
41-
$ .venv/bin/python ./generate_electorate.py
33+
$ .venv/bin/python ./generate_electorate.py -end 07/04/25
4234
```
4335

44-
The code looks at all commits in all Kata Containers repos *except*
45-
`kata-containers/linux` and `kata-containers/qemu`. As both of these are forks
46-
(in the GitHub sense) they'll have lots of contributors that may not be Kata
47-
contributors.
36+
The code looks at all commits in all the active Kata Containers repos . A number of
37+
archived/forks repos are ignored to save time/avoid including non Kata contributors.
4838

4939
For contributors that have more than one email address it picks one as default
5040
but supplies all the others so we can be smarter about where to send the

elections/tools/generate_electorate.py

Lines changed: 105 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@
99
# Description: Generate a list of kata contributors by extracting contact
1010
# information from GitHub
1111

12+
import argparse
1213
import datetime
13-
import pytz
14+
from datetime import timedelta
15+
import os
1416
import re
1517
import yaml
1618

@@ -68,23 +70,22 @@ def _author_representer(dumper, data):
6870
commit_count=data.commit_count)
6971
return dumper.represent_dict(o_dict.items())
7072

71-
72-
dco_re = re.compile('signed.off.by[: ]*(?P<name>[^<]*)<(?P<email>.*)>$',
73-
re.IGNORECASE | re.MULTILINE)
74-
# Get a token GitHub Personal API token see:
75-
# https://blog.github.com/2013-05-16-personal-api-tokens/
76-
# for more information.
77-
gh = login(token='__API_TOKEN__')
78-
org = gh.organization('kata-containers')
79-
# Example dates for testing.
80-
start_time = datetime.datetime(2018, 1, 1, 0, 0, 0, tzinfo=pytz.UTC)
81-
end_time = datetime.datetime(2018, 8, 1, 0, 0, 0, tzinfo=pytz.UTC)
82-
# ... Or run just include all commits
83-
# start_time = end_time = None
84-
# All commits
85-
number = -1
86-
projects = []
87-
ignored_repos = [
73+
def find_authors_by_project(start_time, end_time):
74+
dco_re = re.compile('signed.off.by[: ]*(?P<name>[^<]*)<(?P<email>.*)>$',
75+
re.IGNORECASE | re.MULTILINE)
76+
# Get a token GitHub Personal API token see:
77+
# https://blog.github.com/2013-05-16-personal-api-tokens/
78+
# for more information.
79+
try:
80+
personal_token=os.environ['GH_TOKEN']
81+
except KeyError:
82+
raise Exception("GH_TOKEN environment variable was not set")
83+
84+
gh = login(token=personal_token)
85+
org = gh.organization('kata-containers')
86+
number = -1
87+
projects = []
88+
ignored_repos = [
8889
'agent',
8990
'ci',
9091
'dbs-snapshot',
@@ -105,77 +106,97 @@ def _author_representer(dumper, data):
105106
'shim',
106107
'slash-command-action',
107108
'tests',
108-
]
109-
109+
]
110110

111-
author_cache = {}
112-
for repo in org.repositories():
113-
# Skip these repos as they are not a core part of the project, and are
114-
# forked/imported/archived so contain many contributors from outside the project.
115-
# Also skip the github security advisory repos for quicker processing
116-
if str(repo).split("/")[1] in ignored_repos or str(repo).split("/")[1].startswith('kata-containers-ghsa'):
117-
print('Skipping repo %s' % (repo))
118-
continue
119-
print('Looking for changes in %s between %s and %s' %
120-
(repo, start_time, end_time))
121111

122-
authors = AuthorSet()
123-
for commit in repo.commits(since=start_time, until=end_time, number=number):
124-
125-
# If a commit has >1 parents then it's a merge commit, so skip these
126-
if len(commit.parents) > 1:
112+
author_cache = {}
113+
for repo in org.repositories():
114+
# Skip these repos as they are not a core part of the project, and are
115+
# forked/imported/archived so contain many contributors from outside the project.
116+
# Also skip the github security advisory repos for quicker processing
117+
if str(repo).split("/")[1] in ignored_repos or str(repo).split("/")[1].startswith('kata-containers-ghsa'):
118+
print('Skipping repo %s' % (repo))
127119
continue
120+
print('Looking for changes in %s between %s and %s' %
121+
(repo, start_time, end_time))
128122

129-
if commit.author is None:
130-
if commit.commit.author is None:
131-
print('Skipping %s in %s as it has no author. Did this merge via GitHub?' %
132-
(commit, repo))
123+
authors = AuthorSet()
124+
for commit in repo.commits(since=start_time, until=end_time, number=number):
125+
# If a commit has >1 parents then it's a merge commit, so skip these
126+
if len(commit.parents) > 1:
133127
continue
134128

135-
author_id = commit.commit.author.get('email')
136-
print('%s in %s as has no author. Using email (%s) as the author id' %
137-
(commit, repo, author_id))
138-
else:
139-
author_id = commit.author.login
140-
141-
if author_id not in author_cache:
142129
if commit.author is None:
143-
author = Author(author_id, email=author_id,
144-
name=commit.commit.author.get('name'))
130+
if commit.commit.author is None:
131+
print('Skipping %s in %s as it has no author. Did this merge via GitHub?' %
132+
(commit, repo))
133+
continue
134+
135+
author_id = commit.commit.author.get('email')
136+
print('%s in %s as has no author. Using email (%s) as the author id' %
137+
(commit, repo, author_id))
145138
else:
146-
_author = gh.user(commit.author.login)
147-
author = Author(_author.login, email=_author.email,
148-
name=_author.name)
149-
150-
author_cache[author_id] = author
151-
152-
author = author_cache[author_id]
153-
author.commit_count += 1
154-
155-
# If the GitHub account doesn't have a name or email address
156-
# the author *may* have included it in their git config.
157-
if author.email is None and commit.commit.author.get('email'):
158-
author.email = commit.commit.author.get('email')
159-
if author.name is None and commit.commit.author.get('name'):
160-
author.name = commit.commit.author.get('name')
161-
162-
# last ditch effort did the author use a valid email address in the
163-
# DCO line?
164-
match = dco_re.search(commit.message)
165-
if match:
166-
if ((author.email is None or
167-
'users.noreply.github.com' in author.email) and
168-
match.group('email')):
169-
author.email = match.group('email')
170-
if author.name is None and match.group('name'):
171-
author.name = match.group('name')
172-
authors.add(author)
173-
projects.append({str(repo): authors})
174-
175-
# Dark YAML voodoo
176-
yaml.Dumper.ignore_aliases = lambda *args: True
177-
yaml.Dumper.add_representer(AuthorSet, _authorset_representer)
178-
yaml.Dumper.add_representer(Author, _author_representer)
179-
with open('electorate.yaml', 'w') as f:
180-
yaml.dump(projects, f, default_flow_style=False, default_style='',
181-
explicit_start=True)
139+
author_id = commit.author.login
140+
141+
if author_id not in author_cache:
142+
if commit.author is None:
143+
author = Author(author_id, email=author_id,
144+
name=commit.commit.author.get('name'))
145+
else:
146+
_author = gh.user(commit.author.login)
147+
author = Author(_author.login, email=_author.email,
148+
name=_author.name)
149+
150+
author_cache[author_id] = author
151+
152+
author = author_cache[author_id]
153+
author.commit_count += 1
154+
155+
# If the GitHub account doesn't have a name or email address
156+
# the author *may* have included it in their git config.
157+
if author.email is None and commit.commit.author.get('email'):
158+
author.email = commit.commit.author.get('email')
159+
if author.name is None and commit.commit.author.get('name'):
160+
author.name = commit.commit.author.get('name')
161+
162+
# last ditch effort did the author use a valid email address in the
163+
# DCO line?
164+
match = dco_re.search(commit.message)
165+
if match:
166+
if ((author.email is None or
167+
'users.noreply.github.com' in author.email) and
168+
match.group('email')):
169+
author.email = match.group('email')
170+
if author.name is None and match.group('name'):
171+
author.name = match.group('name')
172+
authors.add(author)
173+
projects.append({str(repo): authors})
174+
return projects
175+
176+
def main():
177+
178+
parser = argparse.ArgumentParser(description='An electorate generation script')
179+
parser.add_argument("-end", required=True,help='the end date of the period to examine in format %%d/%%m/%%y.')
180+
parser.add_argument("-start", help='the start date of the period to examine in format %%d/%%m/%%y. If not set will default to' \
181+
'365 days before the end time')
182+
183+
args = parser.parse_args()
184+
end_time = datetime.datetime.strptime(args.end, '%d/%m/%y')
185+
start_time = end_time - timedelta(days=365)
186+
if args.start != None:
187+
start_time = datetime.datetime.strptime(args.start, '%d/%m/%y')
188+
189+
print("Getting committers from", start_time, " -> ", end_time)
190+
191+
projects=find_authors_by_project(start_time, end_time)
192+
193+
# Dark YAML voodoo
194+
yaml.Dumper.ignore_aliases = lambda *args: True
195+
yaml.Dumper.add_representer(AuthorSet, _authorset_representer)
196+
yaml.Dumper.add_representer(Author, _author_representer)
197+
with open('electorate.yaml', 'w') as f:
198+
yaml.dump(projects, f, default_flow_style=False, default_style='',
199+
explicit_start=True)
200+
201+
if __name__ == '__main__':
202+
main()

0 commit comments

Comments
 (0)