Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 7 additions & 17 deletions elections/tools/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,29 +22,19 @@ $ python3 -m venv .venv
$ .venv/bin/pip install pytz github3.py pyyaml
```

Before running the tool you will need to create a
Before running the tool you will need to create a personal
[GitHub API token](https://github.blog/2013-05-16-personal-api-tokens/)
set as an environment variable called: `GH_TOKEN`

replace `__API_TOKEN__` in the script with your personal token.

Also update the election start and end times to cover the period being
examined for this election period. The lines to edit look like:

```python
start_time = datetime.datetime(2018, 1, 1, 0, 0, 0, tzinfo=pytz.UTC)
end_time = datetime.datetime(2018, 8, 1, 0, 0, 0, tzinfo=pytz.UTC)
```

Then run the tool with:
Then run the tool supplying `-end <date of candidate nomination in %d/%m/%y format>`.
e.g. if the nomination period began on 7th April 2025:

```bash
$ .venv/bin/python ./generate_electorate.py
$ .venv/bin/python ./generate_electorate.py -end 07/04/25
```

The code looks at all commits in all Kata Containers repos *except*
`kata-containers/linux` and `kata-containers/qemu`. As both of these are forks
(in the GitHub sense) they'll have lots of contributors that may not be Kata
contributors.
The code looks at all commits in all the active Kata Containers repos . A number of
archived/forks repos are ignored to save time/avoid including non Kata contributors.

For contributors that have more than one email address it picks one as default
but supplies all the others so we can be smarter about where to send the
Expand Down
175 changes: 115 additions & 60 deletions elections/tools/generate_electorate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@

#
# Copyright (c) 2023 Kata Contributors
# Copyright (c) 2025 IBM Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
# Description: Generate a list of kata contributors by extracting contact
# information from GitHub

import argparse
import datetime
import pytz
from datetime import timedelta
import os
import re
import yaml

Expand Down Expand Up @@ -67,71 +70,96 @@ def _author_representer(dumper, data):
commit_count=data.commit_count)
return dumper.represent_dict(o_dict.items())


dco_re = re.compile('signed.off.by[: ]*(?P<name>[^<]*)<(?P<email>.*)>$',
re.IGNORECASE | re.MULTILINE)
# Get a token GitHub Personal API token see:
# https://blog.github.com/2013-05-16-personal-api-tokens/
# for more information.
gh = login(token='__API_TOKEN__')
org = gh.organization('kata-containers')
# Example dates for testing.
start_time = datetime.datetime(2018, 1, 1, 0, 0, 0, tzinfo=pytz.UTC)
end_time = datetime.datetime(2018, 8, 1, 0, 0, 0, tzinfo=pytz.UTC)
# ... Or run just include all commits
# start_time = end_time = None
# All commits
number = -1
projects = []
ignored_repos = [
'cgroups-rs',
def find_authors_by_project(start_time, end_time):
dco_re = re.compile('signed.off.by[: ]*(?P<name>[^<]*)<(?P<email>.*)>$',
re.IGNORECASE | re.MULTILINE)
# Get a token GitHub Personal API token see:
# https://blog.github.com/2013-05-16-personal-api-tokens/
# for more information.
try:
personal_token=os.environ['GH_TOKEN']
except KeyError:
raise Exception("GH_TOKEN environment variable was not set")

gh = login(token=personal_token)
org = gh.organization('kata-containers')
number = -1
projects = []
ignored_repos = [
'agent',
'ci',
'dbs-snapshot',
'documentation',
'edk2',
'qemu',
'linux',
'project-infra',
'govmm',
'resolve-pr-refs',
'is-organization-member',
'kata-containers-github-actions-tests',
'ksm-throttler',
'linux',
'osbuilder',
'packaging',
'project-infra',
'proxy',
'qemu',
'resolve-pr-refs',
'runtime',
'shim',
'packaging',
'ksm-throttler',
'documentation',
'agent',
'slash-command-action',
'tests-1',
'kata-containers-github-actions-tests',
'kata-containers-cache-kernel',
'kata-containers-2',
'kata-containers-1',
]

author_cache = {}
for repo in org.repositories():
# Skip these repos as they are not a core part of the project, and are
# forked/imported so contain many contributors from outside the project.
if str(repo).split("/")[1] in ignored_repos:
print('Skipping repo %s' % (repo))
continue
print('Looking for changes in %s between %s and %s' %
(repo, start_time, end_time))

authors = AuthorSet()
for branch in repo.branches():
for commit in repo.commits(sha=branch.name, since=start_time, until=end_time,
number=number):
'tests',
]

# Let's build a list of the users we can't get the logins for so
# we can prompt the runner to update the following map
unknown_logins = {}

# Some committers have emails that don't map to their
# userid properly, so maintain a map of these so the data is consistent
email_id_map = {
"[email protected]":"lifupan",
"[email protected]": "Ankita13-code",
"[email protected]": "ChengyuZhu6",
"[email protected]": "lima-emanuel",
"[email protected]": "l8huang",
"[email protected]": "Bickor",
"[email protected]": "huoqifeng",
"[email protected]": "niteeshkd",
"[email protected]": "gpyrros",
"[email protected]": "cmaf",
"[email protected]": "amshinde",
"[email protected]": "seungukshin",
}

author_cache = {}
for repo in org.repositories():
# Skip these repos as they are not a core part of the project, and are
# forked/imported/archived so contain many contributors from outside the project.
# Also skip the github security advisory repos for quicker processing
if str(repo).split("/")[1] in ignored_repos or str(repo).split("/")[1].startswith('kata-containers-ghsa'):
print('Skipping repo %s' % (repo))
continue
print('Looking for changes in %s between %s and %s' %
(repo, start_time, end_time))

authors = AuthorSet()
for commit in repo.commits(since=start_time, until=end_time, number=number):
# If a commit has >1 parents then it's a merge commit, so skip these
if len(commit.parents) > 1:
continue

if commit.author is None:
if commit.commit.author is None:
print('Skipping %s in %s as it has no author. Did this merge via GitHub?' %
(commit, repo))
(commit, repo))
continue

author_id = commit.commit.author.get('email')
print('%s in %s as has no author. Using email (%s) as the author id' %
(commit, repo, author_id))
if author_id in email_id_map:
author_id = email_id_map[author_id]
else:
if not author_id in unknown_logins:
unknown_logins[author_id] = commit.html_url
print('%s in %s as has no author. Using email (%s) as the author id' %
(commit, repo, author_id))
else:
author_id = commit.author.login

Expand Down Expand Up @@ -167,12 +195,39 @@ def _author_representer(dumper, data):
if author.name is None and match.group('name'):
author.name = match.group('name')
authors.add(author)
projects.append({str(repo): authors})

# Dark YAML voodoo
yaml.Dumper.ignore_aliases = lambda *args: True
yaml.Dumper.add_representer(AuthorSet, _authorset_representer)
yaml.Dumper.add_representer(Author, _author_representer)
with open('electorate.yaml', 'w') as f:
yaml.dump(projects, f, default_flow_style=False, default_style='',
explicit_start=True)
projects.append({str(repo): authors})

if len(unknown_logins) > 0:
print("Warning: failed to match some emails. Please follow the commit link and add the email -> id " \
"map to `email_id_map` in generate_electorate.py:")
for email, commit_html in unknown_logins.items():
print("Email", email, "who committed", commit_html)
return projects

def main():

parser = argparse.ArgumentParser(description='An electorate generation script')
parser.add_argument("-end", required=True,help='the end date of the period to examine in format %%d/%%m/%%y.')
parser.add_argument("-start", help='the start date of the period to examine in format %%d/%%m/%%y. If not set will default to' \
'365 days before the end time')

args = parser.parse_args()
end_time = datetime.datetime.strptime(args.end, '%d/%m/%y')
start_time = end_time - timedelta(days=365)
if args.start != None:
start_time = datetime.datetime.strptime(args.start, '%d/%m/%y')

print("Getting committers from", start_time, " -> ", end_time)

projects=find_authors_by_project(start_time, end_time)

# Dark YAML voodoo
yaml.Dumper.ignore_aliases = lambda *args: True
yaml.Dumper.add_representer(AuthorSet, _authorset_representer)
yaml.Dumper.add_representer(Author, _author_representer)
with open('electorate.yaml', 'w') as f:
yaml.dump(projects, f, default_flow_style=False, default_style='',
explicit_start=True)

if __name__ == '__main__':
main()
Loading