Skip to content

Commit de10cdd

Browse files
committed
rpm_dep_tree: graph of (Build)Requires for a rpm
1 parent c8a6eb8 commit de10cdd

File tree

2 files changed

+253
-0
lines changed

2 files changed

+253
-0
lines changed

scripts/rpmwatcher/repoquery.py

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
import logging
2+
import os
3+
import re
4+
import subprocess
5+
from typing import Iterable, Sequence
6+
7+
XCPNG_YUMREPO_TMPL = """
8+
[xcpng-{section}{suffix}]
9+
name=xcpng - {section}{suffix}
10+
baseurl=https://updates.xcp-ng.org/8/{version}/{section}/{rpmarch}/
11+
gpgkey=https://xcp-ng.org/RPM-GPG-KEY-xcpng
12+
failovermethod=priority
13+
skip_if_unavailable=False
14+
"""
15+
16+
# DNF v4 adds an implicit trailing newline to --qf format, but v5 does not
17+
dnf_version = subprocess.check_output(['dnf', '--version'], universal_newlines=True).strip().split('.')
18+
if int(dnf_version[0]) >= 5:
19+
QFNL = "\n"
20+
else:
21+
QFNL = ""
22+
23+
def setup_xcpng_yum_repos(*, yum_repo_d: str, sections: Iterable[str],
24+
bin_arch: str | None, version: str) -> None:
25+
with open(os.path.join(yum_repo_d, "xcpng.repo"), "w") as yumrepoconf:
26+
for section in sections:
27+
# binaries
28+
block = XCPNG_YUMREPO_TMPL.format(rpmarch=bin_arch,
29+
section=section,
30+
version=version,
31+
suffix='',
32+
)
33+
yumrepoconf.write(block)
34+
# sources
35+
block = XCPNG_YUMREPO_TMPL.format(rpmarch='Source',
36+
section=section,
37+
version=version,
38+
suffix='-src',
39+
)
40+
yumrepoconf.write(block)
41+
42+
DNF_BASE_CMD = None
43+
def dnf_setup(*, dnf_conf: str, yum_repo_d: str) -> None:
44+
global DNF_BASE_CMD
45+
DNF_BASE_CMD = ['dnf', '--quiet',
46+
'--releasever', 'WTF',
47+
'--config', dnf_conf,
48+
f'--setopt=reposdir={yum_repo_d}',
49+
]
50+
51+
BINRPM_SOURCE_CACHE: dict[str, str] = {}
52+
def rpm_source_package(rpmname: str) -> str:
53+
return BINRPM_SOURCE_CACHE[rpmname]
54+
55+
def run_repoquery(args: list[str], split: bool = True) -> str | Sequence[str]:
56+
assert DNF_BASE_CMD is not None
57+
cmd = DNF_BASE_CMD + ['repoquery'] + args
58+
logging.debug('$ %s', ' '.join(cmd))
59+
output = subprocess.check_output(cmd, universal_newlines=True).strip()
60+
logging.debug('> %s', output)
61+
return output.split() if split else output
62+
63+
SRPM_BINRPMS_CACHE: dict[str, set[str]] = {} # binrpm-nevr -> srpm-nevr
64+
def fill_srpm_binrpms_cache() -> None:
65+
# HACK: get nevr for what dnf outputs as %{sourcerpm}
66+
logging.debug("get epoch info for SRPMs")
67+
args = [
68+
'--disablerepo=*', '--enablerepo=*-src', '*',
69+
'--qf', '%{name}-%{version}-%{release}.src.rpm,%{name}-%{evr}' + QFNL,
70+
'--latest-limit=1',
71+
]
72+
SRPM_NEVR_CACHE = { # sourcerpm -> srpm-nevr
73+
sourcerpm: nevr
74+
for sourcerpm, nevr in (line.split(',')
75+
for line in run_repoquery(args))
76+
}
77+
78+
# binary -> source mapping
79+
logging.debug("get binary to source mapping")
80+
global SRPM_BINRPMS_CACHE, BINRPM_SOURCE_CACHE
81+
args = [
82+
'--disablerepo=*-src', '*',
83+
'--qf', '%{name}-%{evr},%{sourcerpm}' + QFNL, # FIXME no epoch in sourcerpm, why does it work?
84+
'--latest-limit=1',
85+
]
86+
BINRPM_SOURCE_CACHE = {
87+
# packages without source are not in SRPM_NEVR_CACHE, fallback to sourcerpm
88+
binrpm: SRPM_NEVR_CACHE.get(sourcerpm, srpm_strip_src_rpm(sourcerpm))
89+
for binrpm, sourcerpm in (line.split(',')
90+
for line in run_repoquery(args))
91+
}
92+
93+
# reverse mapping source -> binaries
94+
SRPM_BINRPMS_CACHE = {}
95+
for binrpm, srpm in BINRPM_SOURCE_CACHE.items():
96+
binrpms = SRPM_BINRPMS_CACHE.get(srpm, set())
97+
if not binrpms:
98+
SRPM_BINRPMS_CACHE[srpm] = binrpms
99+
binrpms.add(binrpm)
100+
101+
def srpm_nevr(rpmname: str) -> str:
102+
args = [
103+
'--disablerepo=*', '--enablerepo=*-src',
104+
'--qf=%{name}-%{evr}' + QFNL, # to get the epoch only when non-zero
105+
'--latest-limit=1',
106+
rpmname,
107+
]
108+
ret = run_repoquery(args)
109+
assert ret, f"Found no SRPM named {rpmname}"
110+
assert len(ret) == 1 # ensured by --latest-limit=1 ?
111+
return ret[0]
112+
113+
# dnf insists on spitting .src.rpm names it cannot take as input itself
114+
def srpm_strip_src_rpm(srpmname: str) -> str:
115+
SUFFIX = ".src.rpm"
116+
assert srpmname.endswith(SUFFIX), f"{srpmname} does not end in .src.rpm"
117+
nrv = srpmname[:-len(SUFFIX)]
118+
return nrv
119+
120+
def rpm_requires(rpmname: str) -> Sequence[str]:
121+
args = [
122+
'--disablerepo=*-src', # else requires of same-name SRPM are included
123+
'--qf=%{name}-%{evr}' + QFNL, # to avoid getting the arch and explicit zero epoch
124+
'--resolve',
125+
'--requires', rpmname,
126+
]
127+
ret = run_repoquery(args)
128+
return ret
129+
130+
def srpm_requires(srpmname: str) -> set[str]:
131+
args = [
132+
'--qf=%{name}-%{evr}' + QFNL, # to avoid getting the arch
133+
'--resolve',
134+
'--requires', f"{srpmname}.src",
135+
]
136+
ret = set(run_repoquery(args))
137+
return ret
138+
139+
def srpm_binrpms(srpmname: str) -> set[str]:
140+
ret = SRPM_BINRPMS_CACHE.get(srpmname, None)
141+
if ret is None: # FIXME should not happen
142+
logging.error("%r not found in cache", srpmname)
143+
assert False
144+
return []
145+
logging.debug("binrpms for %s: %s", srpmname, ret)
146+
return ret
147+
148+
UPSTREAM_REGEX = re.compile(r'\.el[0-9]+(_[0-9]+)?(\..*|)$')
149+
RPM_NVR_SPLIT_REGEX = re.compile(r'^(.+)-([^-]+)-([^-]+)$')
150+
def is_pristine_upstream(rpmname:str) -> bool:
151+
if re.search(UPSTREAM_REGEX, rpmname):
152+
return True
153+
return False

scripts/rpmwatcher/rpm_dep_tree

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
#! /usr/bin/env python3
2+
3+
import atexit
4+
import logging
5+
import re
6+
import sys
7+
import tempfile
8+
9+
import repoquery
10+
11+
ARCH = "x86_64"
12+
SHOW_BOUNDARY = False
13+
14+
# Tell if package is pristine upstream, or part of well-kown list of
15+
# packages we want to consider as "upstream" rather than forks
16+
def is_upstream(rpmname: str) -> bool:
17+
if repoquery.is_pristine_upstream(rpmname):
18+
return True
19+
m = re.match(repoquery.RPM_NVR_SPLIT_REGEX, rpmname)
20+
assert m, f"{rpmname!r} does not match {repoquery.RPM_NVR_SPLIT_REGEX!r}"
21+
if m.group(1) in ['systemd', 'util-linux', 'ncurses',
22+
#'xapi',
23+
'devtoolset-11-gcc', 'devtoolset-11-binutils']:
24+
return True
25+
return False
26+
27+
def main() -> int:
28+
logging.basicConfig(format='[%(levelname)s] %(message)s', level=logging.DEBUG)
29+
30+
this_exe, version, root_srpm = sys.argv
31+
32+
with (tempfile.NamedTemporaryFile() as dnfconf,
33+
tempfile.TemporaryDirectory() as yumrepod,
34+
open(f"{root_srpm}-{version}.dot", "w") as dotfile):
35+
36+
repoquery.setup_xcpng_yum_repos(yum_repo_d=yumrepod,
37+
sections=['base', 'updates'],
38+
bin_arch=ARCH,
39+
version=version)
40+
repoquery.dnf_setup(dnf_conf=dnfconf.name, yum_repo_d=yumrepod)
41+
42+
repoquery.fill_srpm_binrpms_cache()
43+
# print([x for x in sorted(SRPM_BINRPMS_CACHE.keys()) if x.startswith("openssl-")])
44+
# return 0
45+
46+
print("digraph packages {", file=dotfile)
47+
srpms_seen: set[str] = set()
48+
new_srpms = {repoquery.srpm_nevr(root_srpm)}
49+
while new_srpms:
50+
next_srpms = set() # preparing next round's new_srpms
51+
logging.info("seen: %s, new: %s", len(srpms_seen), len(new_srpms))
52+
logging.debug(" new: %s", new_srpms)
53+
for srpm in new_srpms:
54+
# draw source packages themselves
55+
if is_upstream(srpm):
56+
if SHOW_BOUNDARY:
57+
print(f'"{srpm}" [color=grey];', file=dotfile)
58+
logging.debug("skipping upstream %s", srpm)
59+
continue # we don't rebuild upstream rpms
60+
elif ".xcpng8.3.":
61+
print(f'"{srpm}";', file=dotfile)
62+
else:
63+
print(f'"{srpm}" [color=red];', file=dotfile)
64+
65+
# build reqs
66+
breqs = {repoquery.rpm_source_package(breq)
67+
for breq in repoquery.srpm_requires(srpm)}
68+
logging.debug("%s req sources: %s", len(breqs), breqs)
69+
70+
# reqs of binary rpms produced
71+
reqs = set()
72+
for binrpm in repoquery.srpm_binrpms(srpm):
73+
reqs.update({repoquery.rpm_source_package(req)
74+
for req in repoquery.rpm_requires(binrpm)})
75+
76+
# draw breqs, plain
77+
for breq in breqs:
78+
if (not SHOW_BOUNDARY) and is_upstream(breq):
79+
continue
80+
print(f'"{srpm}" -> "{breq}";', file=dotfile)
81+
# draw additional runtime reqs, dotted
82+
for req in reqs.difference(breqs):
83+
if (not SHOW_BOUNDARY) and is_upstream(req):
84+
continue
85+
if srpm == req:
86+
continue # dependency between RPMs of this SRPM
87+
print(f'"{srpm}" -> "{req}" [style=dotted];', file=dotfile)
88+
89+
# accumulate
90+
srpms_seen.update(new_srpms)
91+
next_srpms.update(breqs.difference(srpms_seen))
92+
next_srpms.update(reqs.difference(srpms_seen))
93+
94+
new_srpms = next_srpms
95+
96+
print("}", file=dotfile)
97+
return 0
98+
99+
if __name__ == "__main__":
100+
sys.exit(main())

0 commit comments

Comments
 (0)