Skip to content

Commit 425dad0

Browse files
[None][fix] Clean up linking to CUDA stub libraries in build_wheel.py (#6823)
Signed-off-by: Linda-Stadter <[email protected]> Signed-off-by: Martin Marciniszyn Mehringer <[email protected]> Co-authored-by: Linda-Stadter <[email protected]>
1 parent 1ce2354 commit 425dad0

File tree

4 files changed

+153
-81
lines changed

4 files changed

+153
-81
lines changed

cpp/tensorrt_llm/nanobind/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ target_link_libraries(
4343
${Python3_LIBRARIES}
4444
${TORCH_LIBRARIES}
4545
torch_python
46+
CUDA::cuda_driver
4647
${CUDA_NVML_LIB}
4748
th_common)
4849
target_compile_definitions(
@@ -54,6 +55,6 @@ if(NOT WIN32)
5455
${TRTLLM_NB_MODULE}
5556
PROPERTIES
5657
LINK_FLAGS
57-
"-Wl,-rpath,'$ORIGIN/libs' -Wl,-rpath,'$ORIGIN/../nvidia/nccl/lib' -Wl,-rpath,'${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/lib/stubs' ${AS_NEEDED_FLAG} ${UNDEFINED_FLAG}"
58+
"-Wl,-rpath,'$ORIGIN/libs' -Wl,-rpath,'$ORIGIN/../nvidia/nccl/lib' ${AS_NEEDED_FLAG} ${UNDEFINED_FLAG}"
5859
)
5960
endif()

cpp/tensorrt_llm/pybind/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ target_link_libraries(
4444
${Python3_LIBRARIES}
4545
${TORCH_LIBRARIES}
4646
torch_python
47+
CUDA::cuda_driver
4748
${CUDA_NVML_LIB}
4849
th_common)
4950
target_compile_definitions(
@@ -55,6 +56,6 @@ if(NOT WIN32)
5556
${TRTLLM_PYBIND_MODULE}
5657
PROPERTIES
5758
LINK_FLAGS
58-
"-Wl,-rpath,'$ORIGIN/libs' -Wl,-rpath,'$ORIGIN/../nvidia/nccl/lib' -Wl,-rpath,'${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/lib/stubs' ${AS_NEEDED_FLAG} ${UNDEFINED_FLAG}"
59+
"-Wl,-rpath,'$ORIGIN/libs' -Wl,-rpath,'$ORIGIN/../nvidia/nccl/lib' ${AS_NEEDED_FLAG} ${UNDEFINED_FLAG}"
5960
)
6061
endif()

docker/Dockerfile.multi

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,9 @@ RUN bash ./install_pytorch.sh $TORCH_INSTALL_TYPE && rm install_pytorch.sh
7171
ENV PYTORCH_CUDA_ALLOC_CONF="garbage_collection_threshold:0.99999"
7272

7373
# Install OpenCV with FFMPEG support
74-
RUN pip3 uninstall -y opencv && rm -rf /usr/local/lib/python3*/dist-packages/cv2/
75-
RUN pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir
74+
RUN pip3 uninstall -y opencv && \
75+
rm -rf /usr/local/lib/python3*/dist-packages/cv2/ && \
76+
pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir
7677

7778
# WARs against security issues inherited from pytorch:25.06
7879
# * https://github.com/advisories/GHSA-8qvm-5x2c-j2w7

scripts/build_wheel.py

Lines changed: 146 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@
1616

1717
import os
1818
import platform
19+
import re
1920
import sys
2021
import sysconfig
22+
import tempfile
2123
import warnings
2224
from argparse import ArgumentParser
2325
from contextlib import contextmanager
@@ -27,7 +29,7 @@
2729
from shutil import copy, copytree, rmtree
2830
from subprocess import DEVNULL, CalledProcessError, check_output, run
2931
from textwrap import dedent
30-
from typing import List
32+
from typing import Sequence
3133

3234
try:
3335
from packaging.requirements import Requirement
@@ -120,7 +122,8 @@ def create_venv(project_dir: Path):
120122
return venv_prefix
121123

122124

123-
def setup_venv(project_dir: Path, requirements_file: Path, no_venv: bool):
125+
def setup_venv(project_dir: Path, requirements_file: Path,
126+
no_venv: bool) -> tuple[Path, Path]:
124127
"""Creates/updates a venv and installs requirements.
125128
126129
Args:
@@ -279,14 +282,147 @@ def generate_fmha_cu(project_dir, venv_python):
279282
os.chdir(project_dir)
280283

281284

285+
def create_cuda_stub_links(cuda_stub_dir: str, missing_libs: list[str]) -> str:
286+
"""
287+
Creates symbolic links for CUDA stub libraries in a temporary directory.
288+
289+
Args:
290+
cuda_stub_dir (str): Path to the directory containing CUDA stubs.
291+
missing_libs: Versioned names of the missing libraries.
292+
293+
Returns:
294+
str: Path to the temporary directory where links were created.
295+
"""
296+
cuda_stub_path = Path(cuda_stub_dir)
297+
if not cuda_stub_path.exists():
298+
raise RuntimeError(
299+
f"CUDA stub directory '{cuda_stub_dir}' does not exist.")
300+
301+
# Create a temporary directory for the symbolic links
302+
temp_dir = tempfile.mkdtemp(prefix="cuda_stub_links_")
303+
temp_dir_path = Path(temp_dir)
304+
305+
version_pattern = r'\.\d+'
306+
for missing_lib in filter(lambda x: re.search(version_pattern, x),
307+
missing_libs):
308+
# Define `so` as the first part of `missing_lib` with trailing '.' and digits removed
309+
so = cuda_stub_path / re.sub(version_pattern, '', missing_lib)
310+
so_versioned = temp_dir_path / missing_lib
311+
312+
# Check if the library exists in the original directory
313+
if so.exists():
314+
try:
315+
# Create the symbolic link in the temporary directory
316+
so_versioned.symlink_to(so)
317+
except OSError as e:
318+
# Clean up the temporary directory on error
319+
rmtree(temp_dir)
320+
raise RuntimeError(
321+
f"Failed to create symbolic link for '{missing_lib}' in temporary directory '{temp_dir}': {e}"
322+
)
323+
else:
324+
warnings.warn(
325+
f"Warning: Source library '{so}' does not exist and was skipped."
326+
)
327+
328+
# Return the path to the temporary directory where the links were created
329+
return str(temp_dir_path)
330+
331+
332+
def check_missing_libs(so_prefix: str) -> list[str]:
333+
result = build_run(f"ldd {so_prefix}.cpython*.so",
334+
capture_output=True,
335+
text=True)
336+
missing = []
337+
for line in result.stdout.splitlines():
338+
if "not found" in line:
339+
lib_name = line.split()[
340+
0] # Extract the library name before "=> not found"
341+
if lib_name not in missing:
342+
missing.append(lib_name)
343+
return missing
344+
345+
346+
def generate_python_stubs_linux(binding_type: str, venv_python: Path,
347+
deep_ep: bool):
348+
is_nanobind = binding_type == "nanobind"
349+
if is_nanobind:
350+
build_run(f"\"{venv_python}\" -m pip install nanobind")
351+
build_run(f"\"{venv_python}\" -m pip install pybind11-stubgen")
352+
353+
env_stub_gen = os.environ.copy()
354+
cuda_home_dir = env_stub_gen.get("CUDA_HOME") or env_stub_gen.get(
355+
"CUDA_PATH") or "/usr/local/cuda"
356+
missing_libs = check_missing_libs("bindings")
357+
cuda_stub_dir = f"{cuda_home_dir}/lib64/stubs"
358+
359+
if missing_libs and Path(cuda_stub_dir).exists():
360+
# Create symbolic links for the CUDA stubs
361+
link_dir = create_cuda_stub_links(cuda_stub_dir, missing_libs)
362+
ld_library_path = env_stub_gen.get("LD_LIBRARY_PATH")
363+
env_stub_gen["LD_LIBRARY_PATH"] = ":".join(
364+
filter(None, [link_dir, cuda_stub_dir, ld_library_path]))
365+
else:
366+
link_dir = None
367+
368+
try:
369+
if is_nanobind:
370+
build_run(f"\"{venv_python}\" -m nanobind.stubgen -m bindings -O .",
371+
env=env_stub_gen)
372+
else:
373+
build_run(
374+
f"\"{venv_python}\" -m pybind11_stubgen -o . bindings --exit-code",
375+
env=env_stub_gen)
376+
build_run(
377+
f"\"{venv_python}\" -m pybind11_stubgen -o . deep_gemm_cpp_tllm --exit-code",
378+
env=env_stub_gen)
379+
if deep_ep:
380+
build_run(
381+
f"\"{venv_python}\" -m pybind11_stubgen -o . deep_ep_cpp_tllm --exit-code",
382+
env=env_stub_gen)
383+
finally:
384+
if link_dir:
385+
rmtree(link_dir)
386+
387+
388+
def generate_python_stubs_windows(binding_type: str, venv_python: Path,
389+
pkg_dir: Path, lib_dir: Path):
390+
if binding_type == "nanobind":
391+
print("Windows not yet supported for nanobind stubs")
392+
exit(1)
393+
else:
394+
build_run(f"\"{venv_python}\" -m pip install pybind11-stubgen")
395+
stubgen = "stubgen.py"
396+
stubgen_contents = """
397+
# Loading torch, trt before bindings is required to avoid import errors on windows.
398+
# isort: off
399+
import torch
400+
import tensorrt as trt
401+
# isort: on
402+
import os
403+
import platform
404+
405+
from pybind11_stubgen import main
406+
407+
if __name__ == "__main__":
408+
# Load dlls from `libs` directory before launching bindings.
409+
if platform.system() == "Windows":
410+
os.add_dll_directory(r\"{lib_dir}\")
411+
main()
412+
""".format(lib_dir=lib_dir)
413+
(pkg_dir / stubgen).write_text(dedent(stubgen_contents))
414+
build_run(f"\"{venv_python}\" {stubgen} -o . bindings")
415+
(pkg_dir / stubgen).unlink()
416+
417+
282418
def main(*,
283419
build_type: str = "Release",
284420
generator: str = "",
285421
build_dir: Path = None,
286422
dist_dir: Path = None,
287423
cuda_architectures: str = None,
288424
job_count: int = None,
289-
extra_cmake_vars: List[str] = list(),
425+
extra_cmake_vars: Sequence[str] = tuple(),
290426
extra_make_targets: str = "",
291427
trt_root: str = '/usr/local/tensorrt',
292428
nccl_root: str = None,
@@ -361,7 +497,7 @@ def main(*,
361497

362498
if on_windows:
363499
# Windows does not support multi-device currently.
364-
extra_cmake_vars.extend(["ENABLE_MULTI_DEVICE=0"])
500+
extra_cmake_vars = list(extra_cmake_vars) + ["ENABLE_MULTI_DEVICE=0"]
365501

366502
# The Ninja CMake generator is used for our Windows build
367503
# (Easier than MSBuild to make compatible with our Docker image)
@@ -703,81 +839,14 @@ def get_binding_lib(subdirectory, name):
703839
dirs_exist_ok=True)
704840

705841
if not skip_stubs:
706-
with working_directory(project_dir):
707-
if binding_type == "nanobind":
708-
build_run(f"\"{venv_python}\" -m pip install nanobind")
709-
else:
710-
build_run(
711-
f"\"{venv_python}\" -m pip install pybind11-stubgen")
712842
with working_directory(pkg_dir):
713843
if on_windows:
714-
if binding_type == "nanobind":
715-
print("Windows not yet supported for nanobind stubs")
716-
exit(1)
717-
else:
718-
stubgen = "stubgen.py"
719-
stubgen_contents = """
720-
# Loading torch, trt before bindings is required to avoid import errors on windows.
721-
# isort: off
722-
import torch
723-
import tensorrt as trt
724-
# isort: on
725-
import os
726-
import platform
727-
728-
from pybind11_stubgen import main
729-
730-
if __name__ == "__main__":
731-
# Load dlls from `libs` directory before launching bindings.
732-
if platform.system() == "Windows":
733-
os.add_dll_directory(r\"{lib_dir}\")
734-
main()
735-
""".format(lib_dir=lib_dir)
736-
(pkg_dir / stubgen).write_text(dedent(stubgen_contents))
737-
build_run(f"\"{venv_python}\" {stubgen} -o . bindings")
738-
(pkg_dir / stubgen).unlink()
739-
else:
740-
env_ld = os.environ.copy()
741-
742-
new_library_path = "/usr/local/cuda/compat:/usr/local/cuda/compat/lib:/usr/local/cuda/compat/lib.real"
743-
if 'LD_LIBRARY_PATH' in env_ld:
744-
new_library_path += f":{env_ld['LD_LIBRARY_PATH']}"
745-
746-
result = build_run("find /usr -name *libnvidia-ml.so*",
747-
capture_output=True,
748-
text=True)
749-
assert result.returncode == 0, f"Failed to run find *libnvidia-ml.so*: {result.stderr}"
750-
751-
# Build containers only contain stub version of libnvidia-ml.so and not the real version.
752-
# If real version not in system, we need to create symbolic link to stub version to prevent import errors.
753-
if "libnvidia-ml.so.1" not in result.stdout:
754-
if "libnvidia-ml.so" in result.stdout:
755-
line = result.stdout.splitlines()[0]
756-
path = os.path.dirname(line)
757-
new_library_path += f":{path}"
758-
build_run(f"ln -s {line} {path}/libnvidia-ml.so.1")
759-
else:
760-
print(
761-
f"Failed to find libnvidia-ml.so: {result.stderr}",
762-
file=sys.stderr)
763-
exit(1)
764-
765-
env_ld["LD_LIBRARY_PATH"] = new_library_path
766-
if binding_type == "nanobind":
767-
build_run(
768-
f"\"{venv_python}\" -m nanobind.stubgen -m bindings -O .",
769-
env=env_ld)
770-
else:
771-
build_run(
772-
f"\"{venv_python}\" -m pybind11_stubgen -o . bindings --exit-code",
773-
env=env_ld)
774-
if deep_ep_cuda_architectures:
775-
build_run(
776-
f"\"{venv_python}\" -m pybind11_stubgen -o . deep_ep_cpp_tllm --exit-code",
777-
env=env_ld)
778-
build_run(
779-
f"\"{venv_python}\" -m pybind11_stubgen -o . deep_gemm_cpp_tllm --exit-code",
780-
env=env_ld)
844+
generate_python_stubs_windows(binding_type, venv_python,
845+
pkg_dir, lib_dir)
846+
else: # on linux
847+
generate_python_stubs_linux(
848+
binding_type, venv_python,
849+
bool(deep_ep_cuda_architectures))
781850

782851
if not skip_building_wheel:
783852
if dist_dir is None:

0 commit comments

Comments
 (0)