21
21
from datadog import api , initialize
22
22
23
23
from ddev .cli .application import Application
24
+ from ddev .utils .toml import load_toml_file
24
25
25
26
METRIC_VERSION = 2
26
27
@@ -108,8 +109,9 @@ def get_valid_versions(repo_path: Path | str) -> set[str]:
108
109
"""
109
110
resolved_path = os .path .join (repo_path , os .path .join (repo_path , ".deps" , "resolved" ))
110
111
versions = []
112
+ pattern = re .compile (r"\d+\.\d+" )
111
113
for file in os .listdir (resolved_path ):
112
- match = re .search (r"\d+\.\d+" , file )
114
+ match = pattern .search (file )
113
115
if match :
114
116
versions .append (match .group ())
115
117
return set (versions )
@@ -119,7 +121,40 @@ def is_correct_dependency(platform: str, version: str, name: str) -> bool:
119
121
return platform in name and version in name
120
122
121
123
122
- def is_valid_integration (path : str , included_folder : str , ignored_files : set [str ], git_ignore : list [str ]) -> bool :
124
+ def is_valid_integration_file (
125
+ path : str ,
126
+ repo_path : str ,
127
+ ignored_files : set [str ] | None = None ,
128
+ included_folder : str | None = None ,
129
+ git_ignore : list [str ] | None = None ,
130
+ ) -> bool :
131
+ """
132
+ Check if a file would be packaged with an integration.
133
+
134
+ Used to estimate integration package size by excluding:
135
+ - Hidden files (starting with ".")
136
+ - Files outside "datadog_checks"
137
+ - Helper/test-only packages (e.g. datadog_checks_dev)
138
+ - Files ignored by .gitignore
139
+
140
+ Args:
141
+ path (str): File path to check.
142
+ repo_path (str): Repository root, for loading .gitignore rules.
143
+
144
+ Returns:
145
+ bool: True if the file would be packaged, False otherwise.
146
+ """
147
+ if ignored_files is None :
148
+ ignored_files = {
149
+ "datadog_checks_dev" ,
150
+ "datadog_checks_tests_helper" ,
151
+ }
152
+
153
+ if included_folder is None :
154
+ included_folder = "datadog_checks" + os .sep
155
+
156
+ if git_ignore is None :
157
+ git_ignore = get_gitignore_files (repo_path )
123
158
# It is not an integration
124
159
if path .startswith ("." ):
125
160
return False
@@ -166,29 +201,31 @@ def compress(file_path: str) -> int:
166
201
return compressed_size
167
202
168
203
169
- def get_files (repo_path : str | Path , compressed : bool ) -> list [FileDataEntry ]:
204
+ def get_files (repo_path : str | Path , compressed : bool , py_version : str ) -> list [FileDataEntry ]:
170
205
"""
171
206
Calculates integration file sizes and versions from a repository.
207
+ Only takes into account integrations with a valid version looking at the pyproject.toml file
208
+ The pyproject.toml file should have a classifier with this format:
209
+ classifiers = [
210
+ ...
211
+ "Programming Language :: Python :: 3.12",
212
+ ...
213
+ ]
172
214
"""
173
- ignored_files = {"datadog_checks_dev" , "datadog_checks_tests_helper" }
174
- git_ignore = get_gitignore_files (repo_path )
175
- included_folder = "datadog_checks" + os .sep
176
-
177
215
integration_sizes : dict [str , int ] = {}
178
216
integration_versions : dict [str , str ] = {}
217
+ py_major_version = py_version .split ("." )[0 ]
179
218
180
219
for root , _ , files in os .walk (repo_path ):
220
+ integration_name = str (os .path .relpath (root , repo_path ).split (os .sep )[0 ])
221
+
222
+ if not check_python_version (str (repo_path ), integration_name , py_major_version ):
223
+ continue
181
224
for file in files :
182
225
file_path = os .path .join (root , file )
183
226
relative_path = os .path .relpath (file_path , repo_path )
184
-
185
- if not is_valid_integration (relative_path , included_folder , ignored_files , git_ignore ):
227
+ if not is_valid_integration_file (relative_path , str (repo_path )):
186
228
continue
187
- path = Path (relative_path )
188
- parts = path .parts
189
-
190
- integration_name = parts [0 ]
191
-
192
229
size = compress (file_path ) if compressed else os .path .getsize (file_path )
193
230
integration_sizes [integration_name ] = integration_sizes .get (integration_name , 0 ) + size
194
231
@@ -208,6 +245,23 @@ def get_files(repo_path: str | Path, compressed: bool) -> list[FileDataEntry]:
208
245
]
209
246
210
247
248
+ def check_python_version (repo_path : str , integration_name : str , py_major_version : str ) -> bool :
249
+ pyproject_path = os .path .join (repo_path , integration_name , "pyproject.toml" )
250
+ if os .path .exists (pyproject_path ):
251
+ pyproject = load_toml_file (pyproject_path )
252
+ if "project" not in pyproject or "classifiers" not in pyproject ["project" ]:
253
+ return False
254
+ classifiers = pyproject ["project" ]["classifiers" ]
255
+ integration_py_version = ""
256
+ pattern = re .compile (r"Programming Language :: Python :: (\d+)" )
257
+ for classifier in classifiers :
258
+ match = pattern .match (classifier )
259
+ if match :
260
+ integration_py_version = match .group (1 )
261
+ return integration_py_version == py_major_version
262
+ return False
263
+
264
+
211
265
def extract_version_from_about_py (path : str ) -> str :
212
266
"""
213
267
Extracts the __version__ string from a given __about__.py file.
@@ -248,8 +302,9 @@ def get_dependencies_list(file_path: str) -> tuple[list[str], list[str], list[st
248
302
versions = []
249
303
with open (file_path , "r" , encoding = "utf-8" ) as file :
250
304
file_content = file .read ()
305
+ pattern = re .compile (r"([\w\-\d\.]+) @ (https?://[^\s#]+)" )
251
306
for line in file_content .splitlines ():
252
- match = re .search (r"([\w\-\d\.]+) @ (https?://[^\s#]+)" , line )
307
+ match = pattern .search (line )
253
308
if not match :
254
309
raise WrongDependencyFormat ("The dependency format 'name @ link' is no longer supported." )
255
310
name = match .group (1 )
@@ -327,43 +382,43 @@ def get_dependencies_sizes(
327
382
328
383
329
384
def is_excluded_from_wheel (path : str ) -> bool :
330
- '''
385
+ """
331
386
These files are excluded from the wheel in the agent build:
332
387
https://github.com/DataDog/datadog-agent/blob/main/omnibus/config/software/datadog-agent-integrations-py3.rb
333
388
In order to have more accurate results, this files are excluded when computing the size of the dependencies while
334
389
the wheels still include them.
335
- '''
390
+ """
336
391
excluded_test_paths = [
337
392
os .path .normpath (path )
338
393
for path in [
339
- ' idlelib/idle_test' ,
340
- ' bs4/tests' ,
341
- ' Cryptodome/SelfTest' ,
342
- ' gssapi/tests' ,
343
- ' keystoneauth1/tests' ,
344
- ' openstack/tests' ,
345
- ' os_service_types/tests' ,
346
- ' pbr/tests' ,
347
- ' pkg_resources/tests' ,
348
- ' psutil/tests' ,
349
- ' securesystemslib/_vendor/ed25519/test_data' ,
350
- ' setuptools/_distutils/tests' ,
351
- ' setuptools/tests' ,
352
- ' simplejson/tests' ,
353
- ' stevedore/tests' ,
354
- ' supervisor/tests' ,
355
- ' test' , # cm-client
356
- ' vertica_python/tests' ,
357
- ' websocket/tests' ,
394
+ " idlelib/idle_test" ,
395
+ " bs4/tests" ,
396
+ " Cryptodome/SelfTest" ,
397
+ " gssapi/tests" ,
398
+ " keystoneauth1/tests" ,
399
+ " openstack/tests" ,
400
+ " os_service_types/tests" ,
401
+ " pbr/tests" ,
402
+ " pkg_resources/tests" ,
403
+ " psutil/tests" ,
404
+ " securesystemslib/_vendor/ed25519/test_data" ,
405
+ " setuptools/_distutils/tests" ,
406
+ " setuptools/tests" ,
407
+ " simplejson/tests" ,
408
+ " stevedore/tests" ,
409
+ " supervisor/tests" ,
410
+ " test" , # cm-client
411
+ " vertica_python/tests" ,
412
+ " websocket/tests" ,
358
413
]
359
414
]
360
415
361
416
type_annot_libraries = [
362
- ' krb5' ,
363
- ' Cryptodome' ,
364
- ' ddtrace' ,
365
- ' pyVmomi' ,
366
- ' gssapi' ,
417
+ " krb5" ,
418
+ " Cryptodome" ,
419
+ " ddtrace" ,
420
+ " pyVmomi" ,
421
+ " gssapi" ,
367
422
]
368
423
rel_path = Path (path ).as_posix ()
369
424
@@ -377,7 +432,7 @@ def is_excluded_from_wheel(path: str) -> bool:
377
432
if path_parts :
378
433
dependency_name = path_parts [0 ]
379
434
if dependency_name in type_annot_libraries :
380
- if path .endswith (' .pyi' ) or os .path .basename (path ) == ' py.typed' :
435
+ if path .endswith (" .pyi" ) or os .path .basename (path ) == " py.typed" :
381
436
return True
382
437
383
438
return False
@@ -830,14 +885,14 @@ def send_metrics_to_dd(
830
885
],
831
886
}
832
887
)
833
- key_count = (item [' Platform' ], item [' Python_Version' ])
888
+ key_count = (item [" Platform" ], item [" Python_Version" ])
834
889
if key_count not in n_integrations :
835
890
n_integrations [key_count ] = 0
836
891
if key_count not in n_dependencies :
837
892
n_dependencies [key_count ] = 0
838
- if item [' Type' ] == ' Integration' :
893
+ if item [" Type" ] == " Integration" :
839
894
n_integrations [key_count ] += 1
840
- elif item [' Type' ] == ' Dependency' :
895
+ elif item [" Type" ] == " Dependency" :
841
896
n_dependencies [key_count ] += 1
842
897
843
898
for (platform , py_version ), count in n_integrations .items ():
@@ -919,8 +974,8 @@ def get_last_commit_timestamp() -> int:
919
974
920
975
def get_last_commit_data () -> tuple [str , list [str ], list [str ]]:
921
976
result = subprocess .run (["git" , "log" , "-1" , "--format=%s" ], capture_output = True , text = True , check = True )
922
- ticket_pattern = r' \b(?:DBMON|SAASINT|AGENT|AI)-\d+\b'
923
- pr_pattern = r' #(\d+)'
977
+ ticket_pattern = r" \b(?:DBMON|SAASINT|AGENT|AI)-\d+\b"
978
+ pr_pattern = r" #(\d+)"
924
979
925
980
message = result .stdout .strip ()
926
981
tickets = re .findall (ticket_pattern , message )
0 commit comments