Skip to content

Commit 0f55cd7

Browse files
authored
[AI-5518] Add Python version filtering to integrations to the ddev size command (#20742)
* wip * fix tests * comment * changelog * remove print * rename variables and method * rewrite changelog and change is_valid_integration method name * put comment in is_valid_integration and change regex
1 parent 0c18c2a commit 0f55cd7

File tree

9 files changed

+150
-89
lines changed

9 files changed

+150
-89
lines changed

ddev/changelog.d/20742.added

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Adds logic to ensure `ddev size` filters integrations by the specified Python version.

ddev/src/ddev/cli/size/diff.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,13 +204,13 @@ def get_repo_info(
204204
repo = gitRepo.repo_dir
205205
task = progress.add_task("[cyan]Calculating sizes for the first commit...", total=None)
206206
gitRepo.checkout_commit(first_commit)
207-
files_b = get_files(repo, compressed)
207+
files_b = get_files(repo, compressed, version)
208208
dependencies_b = get_dependencies(repo, platform, version, compressed)
209209
progress.remove_task(task)
210210

211211
task = progress.add_task("[cyan]Calculating sizes for the second commit...", total=None)
212212
gitRepo.checkout_commit(second_commit)
213-
files_a = get_files(repo, compressed)
213+
files_a = get_files(repo, compressed, version)
214214
dependencies_a = get_dependencies(repo, platform, version, compressed)
215215
progress.remove_task(task)
216216

ddev/src/ddev/cli/size/status.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def status_mode(
9494
params: CLIParameters,
9595
) -> list[FileDataEntryPlatformVersion]:
9696
with console.status("[cyan]Calculating sizes...", spinner="dots"):
97-
modules = get_files(repo_path, params["compressed"]) + get_dependencies(
97+
modules = get_files(repo_path, params["compressed"], params["version"]) + get_dependencies(
9898
repo_path, params["platform"], params["version"], params["compressed"]
9999
)
100100

ddev/src/ddev/cli/size/timeline.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,10 @@
2525
compress,
2626
convert_to_human_readable_size,
2727
extract_version_from_about_py,
28-
get_gitignore_files,
2928
get_valid_platforms,
3029
get_valid_versions,
3130
is_correct_dependency,
32-
is_valid_integration,
31+
is_valid_integration_file,
3332
print_table,
3433
save_csv,
3534
save_json,
@@ -454,10 +453,6 @@ def get_files(
454453
)
455454
return file_data
456455

457-
ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"}
458-
git_ignore = get_gitignore_files(repo_path)
459-
included_folder = "datadog_checks" + os.sep
460-
461456
total_size = 0
462457
version = ""
463458

@@ -466,7 +461,7 @@ def get_files(
466461
file_path = os.path.join(root, file)
467462
relative_path = os.path.relpath(file_path, repo_path)
468463

469-
if not is_valid_integration(relative_path, included_folder, ignored_files, git_ignore):
464+
if not is_valid_integration_file(relative_path, repo_path):
470465
continue
471466

472467
if file == "__about__.py" and "datadog_checks" in relative_path:

ddev/src/ddev/cli/size/utils/common_funcs.py

Lines changed: 102 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from datadog import api, initialize
2222

2323
from ddev.cli.application import Application
24+
from ddev.utils.toml import load_toml_file
2425

2526
METRIC_VERSION = 2
2627

@@ -108,8 +109,9 @@ def get_valid_versions(repo_path: Path | str) -> set[str]:
108109
"""
109110
resolved_path = os.path.join(repo_path, os.path.join(repo_path, ".deps", "resolved"))
110111
versions = []
112+
pattern = re.compile(r"\d+\.\d+")
111113
for file in os.listdir(resolved_path):
112-
match = re.search(r"\d+\.\d+", file)
114+
match = pattern.search(file)
113115
if match:
114116
versions.append(match.group())
115117
return set(versions)
@@ -119,7 +121,40 @@ def is_correct_dependency(platform: str, version: str, name: str) -> bool:
119121
return platform in name and version in name
120122

121123

122-
def is_valid_integration(path: str, included_folder: str, ignored_files: set[str], git_ignore: list[str]) -> bool:
124+
def is_valid_integration_file(
125+
path: str,
126+
repo_path: str,
127+
ignored_files: set[str] | None = None,
128+
included_folder: str | None = None,
129+
git_ignore: list[str] | None = None,
130+
) -> bool:
131+
"""
132+
Check if a file would be packaged with an integration.
133+
134+
Used to estimate integration package size by excluding:
135+
- Hidden files (starting with ".")
136+
- Files outside "datadog_checks"
137+
- Helper/test-only packages (e.g. datadog_checks_dev)
138+
- Files ignored by .gitignore
139+
140+
Args:
141+
path (str): File path to check.
142+
repo_path (str): Repository root, for loading .gitignore rules.
143+
144+
Returns:
145+
bool: True if the file would be packaged, False otherwise.
146+
"""
147+
if ignored_files is None:
148+
ignored_files = {
149+
"datadog_checks_dev",
150+
"datadog_checks_tests_helper",
151+
}
152+
153+
if included_folder is None:
154+
included_folder = "datadog_checks" + os.sep
155+
156+
if git_ignore is None:
157+
git_ignore = get_gitignore_files(repo_path)
123158
# It is not an integration
124159
if path.startswith("."):
125160
return False
@@ -166,29 +201,31 @@ def compress(file_path: str) -> int:
166201
return compressed_size
167202

168203

169-
def get_files(repo_path: str | Path, compressed: bool) -> list[FileDataEntry]:
204+
def get_files(repo_path: str | Path, compressed: bool, py_version: str) -> list[FileDataEntry]:
170205
"""
171206
Calculates integration file sizes and versions from a repository.
207+
Only takes into account integrations with a valid version looking at the pyproject.toml file
208+
The pyproject.toml file should have a classifier with this format:
209+
classifiers = [
210+
...
211+
"Programming Language :: Python :: 3.12",
212+
...
213+
]
172214
"""
173-
ignored_files = {"datadog_checks_dev", "datadog_checks_tests_helper"}
174-
git_ignore = get_gitignore_files(repo_path)
175-
included_folder = "datadog_checks" + os.sep
176-
177215
integration_sizes: dict[str, int] = {}
178216
integration_versions: dict[str, str] = {}
217+
py_major_version = py_version.split(".")[0]
179218

180219
for root, _, files in os.walk(repo_path):
220+
integration_name = str(os.path.relpath(root, repo_path).split(os.sep)[0])
221+
222+
if not check_python_version(str(repo_path), integration_name, py_major_version):
223+
continue
181224
for file in files:
182225
file_path = os.path.join(root, file)
183226
relative_path = os.path.relpath(file_path, repo_path)
184-
185-
if not is_valid_integration(relative_path, included_folder, ignored_files, git_ignore):
227+
if not is_valid_integration_file(relative_path, str(repo_path)):
186228
continue
187-
path = Path(relative_path)
188-
parts = path.parts
189-
190-
integration_name = parts[0]
191-
192229
size = compress(file_path) if compressed else os.path.getsize(file_path)
193230
integration_sizes[integration_name] = integration_sizes.get(integration_name, 0) + size
194231

@@ -208,6 +245,23 @@ def get_files(repo_path: str | Path, compressed: bool) -> list[FileDataEntry]:
208245
]
209246

210247

248+
def check_python_version(repo_path: str, integration_name: str, py_major_version: str) -> bool:
249+
pyproject_path = os.path.join(repo_path, integration_name, "pyproject.toml")
250+
if os.path.exists(pyproject_path):
251+
pyproject = load_toml_file(pyproject_path)
252+
if "project" not in pyproject or "classifiers" not in pyproject["project"]:
253+
return False
254+
classifiers = pyproject["project"]["classifiers"]
255+
integration_py_version = ""
256+
pattern = re.compile(r"Programming Language :: Python :: (\d+)")
257+
for classifier in classifiers:
258+
match = pattern.match(classifier)
259+
if match:
260+
integration_py_version = match.group(1)
261+
return integration_py_version == py_major_version
262+
return False
263+
264+
211265
def extract_version_from_about_py(path: str) -> str:
212266
"""
213267
Extracts the __version__ string from a given __about__.py file.
@@ -248,8 +302,9 @@ def get_dependencies_list(file_path: str) -> tuple[list[str], list[str], list[st
248302
versions = []
249303
with open(file_path, "r", encoding="utf-8") as file:
250304
file_content = file.read()
305+
pattern = re.compile(r"([\w\-\d\.]+) @ (https?://[^\s#]+)")
251306
for line in file_content.splitlines():
252-
match = re.search(r"([\w\-\d\.]+) @ (https?://[^\s#]+)", line)
307+
match = pattern.search(line)
253308
if not match:
254309
raise WrongDependencyFormat("The dependency format 'name @ link' is no longer supported.")
255310
name = match.group(1)
@@ -327,43 +382,43 @@ def get_dependencies_sizes(
327382

328383

329384
def is_excluded_from_wheel(path: str) -> bool:
330-
'''
385+
"""
331386
These files are excluded from the wheel in the agent build:
332387
https://github.com/DataDog/datadog-agent/blob/main/omnibus/config/software/datadog-agent-integrations-py3.rb
333388
In order to have more accurate results, this files are excluded when computing the size of the dependencies while
334389
the wheels still include them.
335-
'''
390+
"""
336391
excluded_test_paths = [
337392
os.path.normpath(path)
338393
for path in [
339-
'idlelib/idle_test',
340-
'bs4/tests',
341-
'Cryptodome/SelfTest',
342-
'gssapi/tests',
343-
'keystoneauth1/tests',
344-
'openstack/tests',
345-
'os_service_types/tests',
346-
'pbr/tests',
347-
'pkg_resources/tests',
348-
'psutil/tests',
349-
'securesystemslib/_vendor/ed25519/test_data',
350-
'setuptools/_distutils/tests',
351-
'setuptools/tests',
352-
'simplejson/tests',
353-
'stevedore/tests',
354-
'supervisor/tests',
355-
'test', # cm-client
356-
'vertica_python/tests',
357-
'websocket/tests',
394+
"idlelib/idle_test",
395+
"bs4/tests",
396+
"Cryptodome/SelfTest",
397+
"gssapi/tests",
398+
"keystoneauth1/tests",
399+
"openstack/tests",
400+
"os_service_types/tests",
401+
"pbr/tests",
402+
"pkg_resources/tests",
403+
"psutil/tests",
404+
"securesystemslib/_vendor/ed25519/test_data",
405+
"setuptools/_distutils/tests",
406+
"setuptools/tests",
407+
"simplejson/tests",
408+
"stevedore/tests",
409+
"supervisor/tests",
410+
"test", # cm-client
411+
"vertica_python/tests",
412+
"websocket/tests",
358413
]
359414
]
360415

361416
type_annot_libraries = [
362-
'krb5',
363-
'Cryptodome',
364-
'ddtrace',
365-
'pyVmomi',
366-
'gssapi',
417+
"krb5",
418+
"Cryptodome",
419+
"ddtrace",
420+
"pyVmomi",
421+
"gssapi",
367422
]
368423
rel_path = Path(path).as_posix()
369424

@@ -377,7 +432,7 @@ def is_excluded_from_wheel(path: str) -> bool:
377432
if path_parts:
378433
dependency_name = path_parts[0]
379434
if dependency_name in type_annot_libraries:
380-
if path.endswith('.pyi') or os.path.basename(path) == 'py.typed':
435+
if path.endswith(".pyi") or os.path.basename(path) == "py.typed":
381436
return True
382437

383438
return False
@@ -830,14 +885,14 @@ def send_metrics_to_dd(
830885
],
831886
}
832887
)
833-
key_count = (item['Platform'], item['Python_Version'])
888+
key_count = (item["Platform"], item["Python_Version"])
834889
if key_count not in n_integrations:
835890
n_integrations[key_count] = 0
836891
if key_count not in n_dependencies:
837892
n_dependencies[key_count] = 0
838-
if item['Type'] == 'Integration':
893+
if item["Type"] == "Integration":
839894
n_integrations[key_count] += 1
840-
elif item['Type'] == 'Dependency':
895+
elif item["Type"] == "Dependency":
841896
n_dependencies[key_count] += 1
842897

843898
for (platform, py_version), count in n_integrations.items():
@@ -919,8 +974,8 @@ def get_last_commit_timestamp() -> int:
919974

920975
def get_last_commit_data() -> tuple[str, list[str], list[str]]:
921976
result = subprocess.run(["git", "log", "-1", "--format=%s"], capture_output=True, text=True, check=True)
922-
ticket_pattern = r'\b(?:DBMON|SAASINT|AGENT|AI)-\d+\b'
923-
pr_pattern = r'#(\d+)'
977+
ticket_pattern = r"\b(?:DBMON|SAASINT|AGENT|AI)-\d+\b"
978+
pr_pattern = r"#(\d+)"
924979

925980
message = result.stdout.strip()
926981
tickets = re.findall(ticket_pattern, message)

ddev/tests/cli/size/test_diff.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def mock_size_diff_dependencies():
1818
mock_git_repo.repo_dir = "fake_repo"
1919
mock_git_repo.get_commit_metadata.return_value = ("Feb 1 2025", "", "")
2020

21-
def get_compressed_files_side_effect(_, __):
21+
def get_compressed_files_side_effect(*args, **kwargs):
2222
get_compressed_files_side_effect.counter += 1
2323
if get_compressed_files_side_effect.counter % 2 == 1:
2424
return [{"Name": "path1.py", "Version": "1.1.1", "Size_Bytes": 1000, "Type": "Integration"}] # before
@@ -30,7 +30,7 @@ def get_compressed_files_side_effect(_, __):
3030

3131
get_compressed_files_side_effect.counter = 0
3232

33-
def get_compressed_dependencies_side_effect(_, __, ___, ____):
33+
def get_compressed_dependencies_side_effect(*args, **kwargs):
3434
get_compressed_dependencies_side_effect.counter += 1
3535
if get_compressed_dependencies_side_effect.counter % 2 == 1:
3636
return [{"Name": "dep1", "Version": "1.0.0", "Size_Bytes": 2000, "Type": "Dependency"}] # before
@@ -45,11 +45,11 @@ def get_compressed_dependencies_side_effect(_, __, ___, ____):
4545
with (
4646
patch(
4747
"ddev.cli.size.diff.get_valid_platforms",
48-
return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'macos-aarch64', 'windows-x86_64'}),
48+
return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "macos-aarch64", "windows-x86_64"}),
4949
),
5050
patch(
5151
"ddev.cli.size.diff.get_valid_versions",
52-
return_value=({'3.12'}),
52+
return_value=({"3.12"}),
5353
),
5454
patch("ddev.cli.size.diff.GitRepo.__enter__", return_value=mock_git_repo),
5555
patch("ddev.cli.size.diff.GitRepo.__exit__", return_value=None),
@@ -111,11 +111,11 @@ def test_diff_no_differences(ddev):
111111
patch("ddev.cli.size.diff.GitRepo.__exit__", return_value=None),
112112
patch(
113113
"ddev.cli.size.diff.get_valid_platforms",
114-
return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'macos-aarch64', 'windows-x86_64'}),
114+
return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "macos-aarch64", "windows-x86_64"}),
115115
),
116116
patch(
117117
"ddev.cli.size.diff.get_valid_versions",
118-
return_value=({'3.12'}),
118+
return_value=({"3.12"}),
119119
),
120120
patch.object(fake_repo, "checkout_commit"),
121121
patch("ddev.cli.size.utils.common_funcs.tempfile.mkdtemp", return_value="fake_repo"),
@@ -162,11 +162,11 @@ def test_diff_invalid_platform(ddev):
162162
patch("ddev.cli.size.diff.GitRepo", return_value=mock_git_repo),
163163
patch(
164164
"ddev.cli.size.diff.get_valid_platforms",
165-
return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'macos-aarch64', 'windows-x86_64'}),
165+
return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "macos-aarch64", "windows-x86_64"}),
166166
),
167167
patch(
168168
"ddev.cli.size.diff.get_valid_versions",
169-
return_value=({'3.12'}),
169+
return_value=({"3.12"}),
170170
),
171171
):
172172
result = ddev("size", "diff", "commit1", "commit2", "--platform", "linux", "--python", "3.12", "--compressed")
@@ -184,11 +184,11 @@ def test_diff_invalid_version(ddev):
184184
patch("ddev.cli.size.diff.GitRepo", return_value=mock_git_repo),
185185
patch(
186186
"ddev.cli.size.diff.get_valid_platforms",
187-
return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'macos-aarch64', 'windows-x86_64'}),
187+
return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "macos-aarch64", "windows-x86_64"}),
188188
),
189189
patch(
190190
"ddev.cli.size.diff.get_valid_versions",
191-
return_value=({'3.12'}),
191+
return_value=({"3.12"}),
192192
),
193193
):
194194
result = ddev(
@@ -215,11 +215,11 @@ def test_diff_invalid_platform_and_version(ddev):
215215
patch("ddev.cli.size.diff.GitRepo", return_value=mock_git_repo),
216216
patch(
217217
"ddev.cli.size.diff.get_valid_platforms",
218-
return_value=({'linux-x86_64', 'macos-x86_64', 'linux-aarch64', 'macos-aarch64', 'windows-x86_64'}),
218+
return_value=({"linux-x86_64", "macos-x86_64", "linux-aarch64", "macos-aarch64", "windows-x86_64"}),
219219
),
220220
patch(
221221
"ddev.cli.size.diff.get_valid_versions",
222-
return_value=({'3.12'}),
222+
return_value=({"3.12"}),
223223
),
224224
):
225225
result = ddev("size", "diff", "commit1", "commit2", "--platform", "linux", "--python", "2.10", "--compressed")

0 commit comments

Comments
 (0)