From f8852d16f37586b3e17b0c6a6f1140e3098bfb55 Mon Sep 17 00:00:00 2001 From: Francesco Petrini Date: Fri, 15 Mar 2024 14:52:11 -0700 Subject: [PATCH 01/10] Initial commit --- CMakeLists.txt | 7 + src/python/library/CMakeLists.txt | 49 +++++- src/python/library/build_wheel.py | 27 ++- src/python/library/setup.py | 4 +- .../library/tritonclient/utils/CMakeLists.txt | 27 +-- .../utils/shared_memory/__init__.py | 1 + .../utils/shared_memory/shared_memory.cc | 154 +++++++++++++++++- .../utils/shared_memory/shared_memory.h | 22 +-- .../shared_memory/shared_memory_handle.h | 16 ++ 9 files changed, 266 insertions(+), 41 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9db890fc5..5d0328bdf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,6 +64,13 @@ if(NOT TRITON_ENABLE_PYTHON_GRPC) set(TRITON_COMMON_ENABLE_PROTOBUF_PYTHON OFF) endif() +# FIXME: Windows client currently does not support GPU tensors. +# For simplicity, we will override this option here. +if(WIN32 AND TRITON_ENABLE_GPU) + message("GPU shared memory is not currently supported by the Windows client. Forcing TRITON_ENABLE_GPU to false.") + set(TRITON_ENABLE_GPU OFF CACHE BOOL "GPU disabled" FORCE) +endif() + # # Dependencies # diff --git a/src/python/library/CMakeLists.txt b/src/python/library/CMakeLists.txt index 87bfd4050..7fae4d003 100644 --- a/src/python/library/CMakeLists.txt +++ b/src/python/library/CMakeLists.txt @@ -38,9 +38,7 @@ if(${TRITON_ENABLE_PYTHON_HTTP}) file(COPY tritonhttpclient DESTINATION .) endif() # TRITON_ENABLE_PYTHON_HTTP file(COPY tritonclientutils DESTINATION .) -if (NOT WIN32) - file(COPY tritonshmutils DESTINATION .) -endif() # NOT WIN32 +file(COPY tritonshmutils DESTINATION .) #################################### file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/TRITON_VERSION ${TRITON_VERSION}) @@ -90,6 +88,35 @@ add_custom_target( "${wheel_stamp_file}" ) +# +# Windows-specific Wheel file. +# +if(WIN32) + set(WINDOWS_WHEEL_DEPENDS + cshm + ${WHEEL_DEPENDS} + ) + + if (${TRITON_ENABLE_PERF_ANALYZER}) + set(perf_analyzer_arg --perf-analyzer ${CMAKE_INSTALL_PREFIX}/bin/perf_analyzer) + endif() + set(windows_wheel_stamp_file "windows_stamp.whl") + add_custom_command( + OUTPUT "${windows_wheel_stamp_file}" + COMMAND python3 + ARGS + "${CMAKE_CURRENT_SOURCE_DIR}/build_wheel.py" + --dest-dir "${CMAKE_CURRENT_BINARY_DIR}/windows" + --windows + ${perf_analyzer_arg} + DEPENDS ${LINUX_WHEEL_DEPENDS} + ) + + add_custom_target( + windows-client-wheel ALL + DEPENDS + "${windows_wheel_stamp_file}" + ) # # Linux specific Wheel file. Compatible with x86, x64 and aarch64 # @@ -147,14 +174,20 @@ if(${TRITON_ENABLE_PYTHON_GRPC}) ) endif() # TRITON_ENABLE_PYTHON_GRPC -install( - CODE "file(GLOB _Wheel \"${CMAKE_CURRENT_BINARY_DIR}/generic/triton*.whl\")" - CODE "file(INSTALL \${_Wheel} DESTINATION \"${CMAKE_INSTALL_PREFIX}/python\")" -) -if (NOT WIN32) +if(WIN32) + install( + CODE "file(GLOB _Wheel \"${CMAKE_CURRENT_BINARY_DIR}/windows/triton*.whl\")" + CODE "file(INSTALL \${_Wheel} DESTINATION \"${CMAKE_INSTALL_PREFIX}/python\")" + ) +elseif(NOT WIN32) install( CODE "file(GLOB _Wheel \"${CMAKE_CURRENT_BINARY_DIR}/linux/triton*.whl\")" CODE "file(INSTALL \${_Wheel} DESTINATION \"${CMAKE_INSTALL_PREFIX}/python\")" ) +else() + install( + CODE "file(GLOB _Wheel \"${CMAKE_CURRENT_BINARY_DIR}/generic/triton*.whl\")" + CODE "file(INSTALL \${_Wheel} DESTINATION \"${CMAKE_INSTALL_PREFIX}/python\")" + ) endif() # NOT WIN32 diff --git a/src/python/library/build_wheel.py b/src/python/library/build_wheel.py index d32e7732a..b44b7ba29 100755 --- a/src/python/library/build_wheel.py +++ b/src/python/library/build_wheel.py @@ -28,6 +28,7 @@ import argparse import os import pathlib +import platform import re import shutil import subprocess @@ -83,6 +84,12 @@ def sed(pattern, replace, source, dest=None): required=False, help="Include linux specific artifacts.", ) + parser.add_argument( + "--windows", + action="store_true", + required=False, + help="Include windows specific artifacts.", + ) parser.add_argument( "--perf-analyzer", type=str, @@ -118,7 +125,7 @@ def sed(pattern, replace, source, dest=None): cpdir("tritonhttpclient", os.path.join(FLAGS.whl_dir, "tritonhttpclient")) if os.path.isdir("tritongrpcclient"): cpdir("tritongrpcclient", os.path.join(FLAGS.whl_dir, "tritongrpcclient")) - if FLAGS.linux: + if FLAGS.linux or FLAGS.windows: if os.path.isdir("tritonshmutils"): cpdir("tritonshmutils", os.path.join(FLAGS.whl_dir, "tritonshmutils")) @@ -194,6 +201,21 @@ def sed(pattern, replace, source, dest=None): if not os.path.exists(os.path.join(FLAGS.whl_dir, "perf_client")): os.symlink("perf_analyzer", os.path.join(FLAGS.whl_dir, "perf_client")) + if FLAGS.windows: + cpdir( + "tritonclient/utils/shared_memory", + os.path.join(FLAGS.whl_dir, "tritonclient/utils/shared_memory"), + ) + shutil.copyfile( + "tritonclient/utils/Release/cshm.dll", + os.path.join(FLAGS.whl_dir, "tritonclient/utils/shared_memory/cshm.dll"), + ) + # FIXME: Enable when Windows supports GPU tensors + # cpdir( + # "tritonclient/utils/cuda_shared_memory", + # os.path.join(FLAGS.whl_dir, "tritonclient/utils/cuda_shared_memory"), + # ) + shutil.copyfile("LICENSE.txt", os.path.join(FLAGS.whl_dir, "LICENSE.txt")) shutil.copyfile("setup.py", os.path.join(FLAGS.whl_dir, "setup.py")) cpdir("requirements", os.path.join(FLAGS.whl_dir, "requirements")) @@ -208,6 +230,9 @@ def sed(pattern, replace, source, dest=None): else: platform_name = "manylinux1_x86_64" args = ["python3", "setup.py", "bdist_wheel", "--plat-name", platform_name] + elif FLAGS.windows and platform.uname().machine == "AMD64": + platform_name = "win_amd64" + args = ["python3", "setup.py", "bdist_wheel", "--plat-name", platform_name] else: args = ["python3", "setup.py", "bdist_wheel"] diff --git a/src/python/library/setup.py b/src/python/library/setup.py index 58cddbecf..9c3d21ee7 100755 --- a/src/python/library/setup.py +++ b/src/python/library/setup.py @@ -76,8 +76,10 @@ def req_file(filename, folder="requirements"): extras_require["all"] = list(chain(extras_require.values())) platform_package_data = [] -if PLATFORM_FLAG != "any": +if "linux" in PLATFORM_FLAG: platform_package_data += ["libcshm.so"] +elif PLATFORM_FLAG == "win_amd64": + platform_package_data += ["cshm.dll"] data_files = [ ("", ["LICENSE.txt"]), diff --git a/src/python/library/tritonclient/utils/CMakeLists.txt b/src/python/library/tritonclient/utils/CMakeLists.txt index 7de1acf96..8a77f27e5 100644 --- a/src/python/library/tritonclient/utils/CMakeLists.txt +++ b/src/python/library/tritonclient/utils/CMakeLists.txt @@ -28,22 +28,23 @@ configure_file(__init__.py __init__.py COPYONLY) configure_file(_dlpack.py _dlpack.py COPYONLY) configure_file(_shared_memory_tensor.py _shared_memory_tensor.py COPYONLY) -if(NOT WIN32) - file(COPY shared_memory DESTINATION .) +file(COPY shared_memory DESTINATION .) +# +# libcshm.so / cshm.dll +# +add_library(cshm SHARED shared_memory/shared_memory.cc) +if(${TRITON_ENABLE_GPU}) + target_compile_definitions(cshm PUBLIC TRITON_ENABLE_GPU=1) + target_link_libraries(cshm PUBLIC CUDA::cudart) +endif() # TRITON_ENABLE_GPU - # - # libcshm.so - # - add_library(cshm SHARED shared_memory/shared_memory.cc) - if(${TRITON_ENABLE_GPU}) - target_compile_definitions(cshm PUBLIC TRITON_ENABLE_GPU=1) - target_link_libraries(cshm PUBLIC CUDA::cudart) - endif() # TRITON_ENABLE_GPU +if(NOT WIN32) target_link_libraries(cshm PRIVATE rt) endif() # WIN32 -if(NOT WIN32) - configure_file(shared_memory/__init__.py shared_memory/__init__.py COPYONLY) +configure_file(shared_memory/__init__.py shared_memory/__init__.py COPYONLY) + +if(${TRITON_ENABLE_GPU}) configure_file(cuda_shared_memory/__init__.py cuda_shared_memory/__init__.py COPYONLY) configure_file(cuda_shared_memory/_utils.py cuda_shared_memory/_utils.py COPYONLY) -endif() # NOT WIN32 +endif() # TRITON_ENABLE_GPU diff --git a/src/python/library/tritonclient/utils/shared_memory/__init__.py b/src/python/library/tritonclient/utils/shared_memory/__init__.py index 4fb245ea0..f3a2cb250 100755 --- a/src/python/library/tritonclient/utils/shared_memory/__init__.py +++ b/src/python/library/tritonclient/utils/shared_memory/__init__.py @@ -326,6 +326,7 @@ def __init__(self, err): -4: "unable to read/mmap the shared memory region", -5: "unable to unlink the shared memory region", -6: "unable to munmap the shared memory region", + -7: "unable to create file mapping", } self._msg = None if type(err) == str: diff --git a/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc b/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc index 2ccebb9d1..8c075b23a 100644 --- a/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc +++ b/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc @@ -23,34 +23,172 @@ // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "shared_memory.h" +#ifdef _WIN32 +#include +#else +#include +#include +#endif #include #include -#include -#include #include #include +#include "shared_memory.h" #include "shared_memory_handle.h" //============================================================================== // SharedMemoryControlContext +#ifdef _WIN32 +namespace { + +void* +SharedMemoryHandleCreate( + std::string triton_shm_name, void* shm_addr, std::string shm_key, + HANDLE shm_handle, size_t offset, size_t byte_size) +{ + SharedMemoryHandle* handle = new SharedMemoryHandle(); + handle->triton_shm_name_ = triton_shm_name; + handle->base_addr_ = shm_addr; + handle->shm_key_ = shm_key; + handle->shm_handle_ = shm_handle; + handle->offset_ = offset; + handle->byte_size_ = byte_size; + return reinterpret_cast(handle); +} + +int +SharedMemoryRegionMap( + HANDLE shm_handle, size_t offset, size_t byte_size, void** shm_addr) +{ + // The MapViewOfFile function takes a high-order and low-order DWORD (4 bytes + // each) for offset. 'size_t' can either be 4 or 8 bytes depending on the + // operating system. To handle both cases agnostically, we cast 'offset' to + // uint64 to ensure we have a known size and enough space to perform our + // logical operations. + uint64_t upperbound_offset = (uint64_t)offset; + DWORD high_order_offset = (upperbound_offset >> 32) & 0xFFFFFFFF; + DWORD low_order_offset = upperbound_offset & 0xFFFFFFFF; + // map shared memory to process address space + *shm_addr = MapViewOfFile( + shm_handle, // handle to map object + FILE_MAP_ALL_ACCESS, // read/write permission + high_order_offset, // offset (high-order DWORD) + low_order_offset, // offset (low-order DWORD) + byte_size); + + if (*shm_addr == NULL) { + CloseHandle(shm_handle); + return -1; + } + // For Windows, we cannot close the shared memory handle here. When all + // handles are closed, the system will free the section of the paging + // file the shared memory object uses. Instead, we close on error or when + // we are destroying the shared memory object. + return 0; +} + +} // namespace + +TRITONCLIENT_DECLSPEC int +SharedMemoryRegionCreate( + const char* triton_shm_name, const char* shm_key, size_t byte_size, + void** shm_handle) +{ + // The CreateFileMapping function takes a high-order and low-order DWORD (4 + // bytes each) for size. 'size_t' can either be 4 or 8 bytes depending on the + // operating system. To handle both cases agnostically, we cast 'byte_size' to + // uint64 to ensure we have a known size and enough space to perform our + // logical operations. + uint64_t upperbound_size = (uint64_t)byte_size; + DWORD high_order_size = (upperbound_size >> 32) & 0xFFFFFFFF; + DWORD low_order_size = upperbound_size & 0xFFFFFFFF; + + HANDLE local_handle = CreateFileMapping( + INVALID_HANDLE_VALUE, // use paging file + NULL, // default security + PAGE_READWRITE, // read/write access + high_order_size, // maximum object size (high-order DWORD) + low_order_size, // maximum object size (low-order DWORD) + shm_key); // name of mapping object + + if (local_handle == NULL) { + return -7; + } + + // get base address of shared memory region + void* shm_addr = nullptr; + int err = SharedMemoryRegionMap(local_handle, 0, byte_size, &shm_addr); + if (err == -1) { + return -4; + } + + // create a handle for the shared memory region + *shm_handle = SharedMemoryHandleCreate( + std::string(triton_shm_name), shm_addr, std::string(shm_key), + local_handle, 0, byte_size); + return 0; +} + +TRITONCLIENT_DECLSPEC int +SharedMemoryRegionSet( + void* shm_handle, size_t offset, size_t byte_size, const void* data) +{ + void* shm_addr = + reinterpret_cast(shm_handle)->base_addr_; + char* shm_addr_offset = reinterpret_cast(shm_addr); + std::memcpy(shm_addr_offset + offset, data, byte_size); + return 0; +} + +TRITONCLIENT_DECLSPEC int +GetSharedMemoryHandleInfo( + void* shm_handle, char** shm_addr, const char** shm_key, + SHM_FILE* shm_file_handle, size_t* offset, size_t* byte_size) +{ + SharedMemoryHandle* handle = + reinterpret_cast(shm_handle); + *shm_addr = reinterpret_cast(handle->base_addr_); + *shm_key = handle->shm_key_.c_str(); + *shm_file_handle = handle->shm_handle_; + *offset = handle->offset_; + *byte_size = handle->byte_size_; + return 0; +} + +TRITONCLIENT_DECLSPEC int +SharedMemoryRegionDestroy(void* shm_handle) +{ + SharedMemoryHandle* handle = + reinterpret_cast(shm_handle); + void* shm_addr = reinterpret_cast(handle->base_addr_); + bool success = UnmapViewOfFile(shm_addr); + if (!success) { + return -6; + } + // We keep Windows shared memory handles open until we are done + // using them. When all handles are closed, the system will free + // the section of the pagaing file that the object uses. + CloseHandle(handle->shm_handle_); + + return 0; +} +#else namespace { void* SharedMemoryHandleCreate( std::string triton_shm_name, void* shm_addr, std::string shm_key, - int shm_fd, size_t offset, size_t byte_size) + SHM_FILE shm_file, size_t offset, size_t byte_size) { SharedMemoryHandle* handle = new SharedMemoryHandle(); handle->triton_shm_name_ = triton_shm_name; handle->base_addr_ = shm_addr; handle->shm_key_ = shm_key; - handle->shm_fd_ = shm_fd; + handle->shm_file_ = shm_file; handle->offset_ = offset; handle->byte_size_ = byte_size; return reinterpret_cast(handle); @@ -116,7 +254,7 @@ SharedMemoryRegionSet( int GetSharedMemoryHandleInfo( - void* shm_handle, char** shm_addr, const char** shm_key, int* shm_fd, + void* shm_handle, char** shm_addr, const char** shm_key, SHM_FILE* shm_fd, size_t* offset, size_t* byte_size) { SharedMemoryHandle* handle = @@ -147,5 +285,5 @@ SharedMemoryRegionDestroy(void* shm_handle) return 0; } - +#endif //============================================================================== diff --git a/src/python/library/tritonclient/utils/shared_memory/shared_memory.h b/src/python/library/tritonclient/utils/shared_memory/shared_memory.h index 9d3e9519e..e41c7207f 100644 --- a/src/python/library/tritonclient/utils/shared_memory/shared_memory.h +++ b/src/python/library/tritonclient/utils/shared_memory/shared_memory.h @@ -25,26 +25,28 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #pragma once -#include -#include -#include -#include - #ifdef __cplusplus extern "C" { #endif +#ifdef _WIN32 +#define TRITONCLIENT_DECLSPEC __declspec(dllexport) +typedef HANDLE SHM_FILE; +#else +define TRITONCLIENT_DECLSPEC typedef int SHM_FILE; +#endif + //============================================================================== // SharedMemoryControlContext -int SharedMemoryRegionCreate( +TRITONCLIENT_DECLSPEC int SharedMemoryRegionCreate( const char* triton_shm_name, const char* shm_key, size_t byte_size, void** shm_handle); -int SharedMemoryRegionSet( +TRITONCLIENT_DECLSPEC int SharedMemoryRegionSet( void* shm_handle, size_t offset, size_t byte_size, const void* data); -int GetSharedMemoryHandleInfo( - void* shm_handle, char** shm_addr, const char** shm_key, int* shm_fd, +TRITONCLIENT_DECLSPEC int GetSharedMemoryHandleInfo( + void* shm_handle, char** shm_addr, const char** shm_key, SHM_FILE* shm_file, size_t* offset, size_t* byte_size); -int SharedMemoryRegionDestroy(void* shm_handle); +TRITONCLIENT_DECLSPEC int SharedMemoryRegionDestroy(void* shm_handle); //============================================================================== diff --git a/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h b/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h index b929ed305..5833f5fae 100644 --- a/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h +++ b/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h @@ -30,6 +30,21 @@ #include #endif // TRITON_ENABLE_GPU +#ifdef _WIN32 +#include +struct SharedMemoryHandle { + std::string triton_shm_name_; + std::string shm_key_; +#ifdef TRITON_ENABLE_GPU + cudaIpcMemHandle_t cuda_shm_handle_; + int device_id_; +#endif // TRITON_ENABLE_GPU + void* base_addr_; + HANDLE shm_handle_; + size_t offset_; + size_t byte_size_; +}; +#else struct SharedMemoryHandle { std::string triton_shm_name_; std::string shm_key_; @@ -42,3 +57,4 @@ struct SharedMemoryHandle { size_t offset_; size_t byte_size_; }; +#endif From d4f9cee6ae09d29ea8146b78a8b0212b8758f42d Mon Sep 17 00:00:00 2001 From: Francesco Petrini Date: Fri, 15 Mar 2024 18:00:06 -0700 Subject: [PATCH 02/10] CMakeLists cleanup --- src/python/library/CMakeLists.txt | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/python/library/CMakeLists.txt b/src/python/library/CMakeLists.txt index 7fae4d003..5f2875ee3 100644 --- a/src/python/library/CMakeLists.txt +++ b/src/python/library/CMakeLists.txt @@ -96,7 +96,6 @@ if(WIN32) cshm ${WHEEL_DEPENDS} ) - if (${TRITON_ENABLE_PERF_ANALYZER}) set(perf_analyzer_arg --perf-analyzer ${CMAKE_INSTALL_PREFIX}/bin/perf_analyzer) endif() @@ -117,10 +116,10 @@ if(WIN32) DEPENDS "${windows_wheel_stamp_file}" ) -# -# Linux specific Wheel file. Compatible with x86, x64 and aarch64 -# -if (NOT WIN32) +else() + # + # Linux specific Wheel file. Compatible with x86, x64 and aarch64 + # # Can generate linux specific wheel file on linux systems only. set(LINUX_WHEEL_DEPENDS cshm @@ -147,7 +146,7 @@ if (NOT WIN32) DEPENDS "${linux_wheel_stamp_file}" ) -endif() # NOT WIN32 +endif() # WIN32 if(${TRITON_ENABLE_PYTHON_GRPC}) add_dependencies( @@ -155,12 +154,17 @@ if(${TRITON_ENABLE_PYTHON_GRPC}) grpc-service-py-library proto-py-library ) - if (NOT WIN32) + if (WIN32) + add_dependencies( + windows-client-wheel + grpc-service-py-library proto-py-library + ) + else() add_dependencies( linux-client-wheel grpc-service-py-library proto-py-library - ) - endif() # NOT WIN32 + ) + endif() # WIN32 file( GLOB generated-py @@ -180,14 +184,9 @@ if(WIN32) CODE "file(GLOB _Wheel \"${CMAKE_CURRENT_BINARY_DIR}/windows/triton*.whl\")" CODE "file(INSTALL \${_Wheel} DESTINATION \"${CMAKE_INSTALL_PREFIX}/python\")" ) -elseif(NOT WIN32) - install( - CODE "file(GLOB _Wheel \"${CMAKE_CURRENT_BINARY_DIR}/linux/triton*.whl\")" - CODE "file(INSTALL \${_Wheel} DESTINATION \"${CMAKE_INSTALL_PREFIX}/python\")" - ) else() install( - CODE "file(GLOB _Wheel \"${CMAKE_CURRENT_BINARY_DIR}/generic/triton*.whl\")" + CODE "file(GLOB _Wheel \"${CMAKE_CURRENT_BINARY_DIR}/linux/triton*.whl\")" CODE "file(INSTALL \${_Wheel} DESTINATION \"${CMAKE_INSTALL_PREFIX}/python\")" ) endif() # NOT WIN32 From b69868f8f420321ddbbc7773a60516fd78938223 Mon Sep 17 00:00:00 2001 From: Francesco Petrini Date: Tue, 19 Mar 2024 17:59:17 -0700 Subject: [PATCH 03/10] Fix typo --- src/python/library/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/library/CMakeLists.txt b/src/python/library/CMakeLists.txt index 5f2875ee3..f97c86da0 100644 --- a/src/python/library/CMakeLists.txt +++ b/src/python/library/CMakeLists.txt @@ -108,7 +108,7 @@ if(WIN32) --dest-dir "${CMAKE_CURRENT_BINARY_DIR}/windows" --windows ${perf_analyzer_arg} - DEPENDS ${LINUX_WHEEL_DEPENDS} + DEPENDS ${WINDOWS_WHEEL_DEPENDS} ) add_custom_target( From d28a7808659d63831eda598133cf2408a7f207bb Mon Sep 17 00:00:00 2001 From: Francesco Petrini Date: Wed, 20 Mar 2024 10:25:42 -0700 Subject: [PATCH 04/10] Use generic wheel --- src/python/library/build_wheel.py | 7 ++++--- src/python/library/setup.py | 5 ++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/python/library/build_wheel.py b/src/python/library/build_wheel.py index b44b7ba29..1668bca29 100755 --- a/src/python/library/build_wheel.py +++ b/src/python/library/build_wheel.py @@ -230,9 +230,10 @@ def sed(pattern, replace, source, dest=None): else: platform_name = "manylinux1_x86_64" args = ["python3", "setup.py", "bdist_wheel", "--plat-name", platform_name] - elif FLAGS.windows and platform.uname().machine == "AMD64": - platform_name = "win_amd64" - args = ["python3", "setup.py", "bdist_wheel", "--plat-name", platform_name] + # FIXME: Uncomment when Windows tests do not use WSL + # elif FLAGS.windows and platform.uname().machine == "AMD64": + # platform_name = "win_amd64" + # args = ["python3", "setup.py", "bdist_wheel", "--plat-name", platform_name] else: args = ["python3", "setup.py", "bdist_wheel"] diff --git a/src/python/library/setup.py b/src/python/library/setup.py index 9c3d21ee7..6365d7cd0 100755 --- a/src/python/library/setup.py +++ b/src/python/library/setup.py @@ -78,7 +78,10 @@ def req_file(filename, folder="requirements"): platform_package_data = [] if "linux" in PLATFORM_FLAG: platform_package_data += ["libcshm.so"] -elif PLATFORM_FLAG == "win_amd64": +# FIXME: Uncomment when Windows tests do not use WSL +# elif PLATFORM_FLAG == "win_amd64": +# platform_package_data += ["cshm.dll"] +else: platform_package_data += ["cshm.dll"] data_files = [ From 875331f387a867c7f12dc3c3aa115376b144b440 Mon Sep 17 00:00:00 2001 From: Francesco Petrini Date: Wed, 27 Mar 2024 11:07:27 -0700 Subject: [PATCH 05/10] Revert to windows-wheel and moved GPU override --- CMakeLists.txt | 7 ------- src/python/CMakeLists.txt | 7 +++++++ src/python/library/build_wheel.py | 7 +++---- src/python/library/setup.py | 5 +---- 4 files changed, 11 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5d0328bdf..9db890fc5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,13 +64,6 @@ if(NOT TRITON_ENABLE_PYTHON_GRPC) set(TRITON_COMMON_ENABLE_PROTOBUF_PYTHON OFF) endif() -# FIXME: Windows client currently does not support GPU tensors. -# For simplicity, we will override this option here. -if(WIN32 AND TRITON_ENABLE_GPU) - message("GPU shared memory is not currently supported by the Windows client. Forcing TRITON_ENABLE_GPU to false.") - set(TRITON_ENABLE_GPU OFF CACHE BOOL "GPU disabled" FORCE) -endif() - # # Dependencies # diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 3725789e0..c5d34c210 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -45,6 +45,13 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() +# FIXME: Windows client currently does not support GPU tensors. +# For simplicity, we will override this option here. +if(WIN32 AND TRITON_ENABLE_GPU) + message("GPU shared memory is not currently supported by the Windows python client. Forcing TRITON_ENABLE_GPU to false.") + set(TRITON_ENABLE_GPU OFF CACHE BOOL "GPU disabled" FORCE) +endif() + # # Dependencies # diff --git a/src/python/library/build_wheel.py b/src/python/library/build_wheel.py index 1668bca29..b44b7ba29 100755 --- a/src/python/library/build_wheel.py +++ b/src/python/library/build_wheel.py @@ -230,10 +230,9 @@ def sed(pattern, replace, source, dest=None): else: platform_name = "manylinux1_x86_64" args = ["python3", "setup.py", "bdist_wheel", "--plat-name", platform_name] - # FIXME: Uncomment when Windows tests do not use WSL - # elif FLAGS.windows and platform.uname().machine == "AMD64": - # platform_name = "win_amd64" - # args = ["python3", "setup.py", "bdist_wheel", "--plat-name", platform_name] + elif FLAGS.windows and platform.uname().machine == "AMD64": + platform_name = "win_amd64" + args = ["python3", "setup.py", "bdist_wheel", "--plat-name", platform_name] else: args = ["python3", "setup.py", "bdist_wheel"] diff --git a/src/python/library/setup.py b/src/python/library/setup.py index 6365d7cd0..9c3d21ee7 100755 --- a/src/python/library/setup.py +++ b/src/python/library/setup.py @@ -78,10 +78,7 @@ def req_file(filename, folder="requirements"): platform_package_data = [] if "linux" in PLATFORM_FLAG: platform_package_data += ["libcshm.so"] -# FIXME: Uncomment when Windows tests do not use WSL -# elif PLATFORM_FLAG == "win_amd64": -# platform_package_data += ["cshm.dll"] -else: +elif PLATFORM_FLAG == "win_amd64": platform_package_data += ["cshm.dll"] data_files = [ From dcefeacabf9d6da6f035f043a1dae26b843f3922 Mon Sep 17 00:00:00 2001 From: Francesco Petrini Date: Thu, 4 Apr 2024 13:00:19 -0700 Subject: [PATCH 06/10] Merge Win/Unix functions. Opaque shm file handling --- src/python/CMakeLists.txt | 9 +- src/python/library/CMakeLists.txt | 20 +- src/python/library/build_wheel.py | 9 +- src/python/library/setup.py | 2 +- .../library/tritonclient/utils/CMakeLists.txt | 11 +- .../utils/shared_memory/__init__.py | 19 +- .../utils/shared_memory/shared_memory.cc | 173 ++++++------------ .../utils/shared_memory/shared_memory.h | 7 +- .../shared_memory/shared_memory_handle.h | 32 ++-- 9 files changed, 112 insertions(+), 170 deletions(-) diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index c5d34c210..2d68fd0b7 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -45,13 +45,6 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() -# FIXME: Windows client currently does not support GPU tensors. -# For simplicity, we will override this option here. -if(WIN32 AND TRITON_ENABLE_GPU) - message("GPU shared memory is not currently supported by the Windows python client. Forcing TRITON_ENABLE_GPU to false.") - set(TRITON_ENABLE_GPU OFF CACHE BOOL "GPU disabled" FORCE) -endif() - # # Dependencies # diff --git a/src/python/library/CMakeLists.txt b/src/python/library/CMakeLists.txt index f97c86da0..95ed6fb4f 100644 --- a/src/python/library/CMakeLists.txt +++ b/src/python/library/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -180,13 +180,13 @@ endif() # TRITON_ENABLE_PYTHON_GRPC if(WIN32) - install( - CODE "file(GLOB _Wheel \"${CMAKE_CURRENT_BINARY_DIR}/windows/triton*.whl\")" - CODE "file(INSTALL \${_Wheel} DESTINATION \"${CMAKE_INSTALL_PREFIX}/python\")" - ) + set(WHEEL_DIR "${CMAKE_CURRENT_BINARY_DIR}/windows") else() - install( - CODE "file(GLOB _Wheel \"${CMAKE_CURRENT_BINARY_DIR}/linux/triton*.whl\")" - CODE "file(INSTALL \${_Wheel} DESTINATION \"${CMAKE_INSTALL_PREFIX}/python\")" - ) -endif() # NOT WIN32 + set(WHEEL_DIR "${CMAKE_CURRENT_BINARY_DIR}/linux") +endif() # WIN32 + +install( + CODE "file(GLOB _Wheel \"${WHEEL_DIR}/triton*.whl\")" + CODE "file(INSTALL \${_Wheel} DESTINATION \"${CMAKE_INSTALL_PREFIX}/python\")" +) + diff --git a/src/python/library/build_wheel.py b/src/python/library/build_wheel.py index b44b7ba29..02e28e22f 100755 --- a/src/python/library/build_wheel.py +++ b/src/python/library/build_wheel.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -78,13 +78,14 @@ def sed(pattern, replace, source, dest=None): parser.add_argument( "--dest-dir", type=str, required=True, help="Destination directory." ) - parser.add_argument( + platform_group = parser.add_mutually_exclusive_group() + platform_group.add_argument( "--linux", action="store_true", required=False, help="Include linux specific artifacts.", ) - parser.add_argument( + platform_group.add_argument( "--windows", action="store_true", required=False, @@ -210,7 +211,7 @@ def sed(pattern, replace, source, dest=None): "tritonclient/utils/Release/cshm.dll", os.path.join(FLAGS.whl_dir, "tritonclient/utils/shared_memory/cshm.dll"), ) - # FIXME: Enable when Windows supports GPU tensors + # FIXME: Enable when Windows supports GPU tensors DLIS-4169 # cpdir( # "tritonclient/utils/cuda_shared_memory", # os.path.join(FLAGS.whl_dir, "tritonclient/utils/cuda_shared_memory"), diff --git a/src/python/library/setup.py b/src/python/library/setup.py index 9c3d21ee7..63f5dc41c 100755 --- a/src/python/library/setup.py +++ b/src/python/library/setup.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions diff --git a/src/python/library/tritonclient/utils/CMakeLists.txt b/src/python/library/tritonclient/utils/CMakeLists.txt index 8a77f27e5..5e2d96225 100644 --- a/src/python/library/tritonclient/utils/CMakeLists.txt +++ b/src/python/library/tritonclient/utils/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -24,6 +24,13 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# FIXME: Windows client currently does not support GPU tensors. +# For simplicity, we will override this option here. +if(WIN32 AND TRITON_ENABLE_GPU) + message(FATAL_ERROR "GPU shared memory is not currently supported by the Windows python client.") + set(TRITON_ENABLE_GPU OFF CACHE BOOL "GPU disabled" FORCE) +endif() + configure_file(__init__.py __init__.py COPYONLY) configure_file(_dlpack.py _dlpack.py COPYONLY) configure_file(_shared_memory_tensor.py _shared_memory_tensor.py COPYONLY) @@ -40,7 +47,7 @@ endif() # TRITON_ENABLE_GPU if(NOT WIN32) target_link_libraries(cshm PRIVATE rt) -endif() # WIN32 +endif() # NOT WIN32 configure_file(shared_memory/__init__.py shared_memory/__init__.py COPYONLY) diff --git a/src/python/library/tritonclient/utils/shared_memory/__init__.py b/src/python/library/tritonclient/utils/shared_memory/__init__.py index f3a2cb250..e65881afb 100755 --- a/src/python/library/tritonclient/utils/shared_memory/__init__.py +++ b/src/python/library/tritonclient/utils/shared_memory/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -63,7 +63,7 @@ def from_param(cls, value): c_void_p, POINTER(c_char_p), POINTER(c_char_p), - POINTER(c_int), + POINTER(c_void_p), POINTER(c_uint64), POINTER(c_uint64), ] @@ -205,7 +205,7 @@ def get_contents_as_numpy(shm_handle, datatype, shape, offset=0): The numpy array generated using the contents of the specified shared memory region. """ - shm_fd = c_int() + shm_file = c_void_p() region_offset = c_uint64() byte_size = c_uint64() shm_addr = c_char_p() @@ -216,7 +216,7 @@ def get_contents_as_numpy(shm_handle, datatype, shape, offset=0): shm_handle, byref(shm_addr), byref(shm_key), - byref(shm_fd), + byref(shm_file), byref(region_offset), byref(byte_size), ) @@ -285,9 +285,7 @@ def destroy_shared_memory_region(shm_handle): If unable to unlink the shared memory region. """ - _raise_if_error(c_int(_cshm_shared_memory_region_destroy(shm_handle))) - - shm_fd = c_int() + shm_file = c_void_p() offset = c_uint64() byte_size = c_uint64() shm_addr = c_char_p() @@ -298,13 +296,16 @@ def destroy_shared_memory_region(shm_handle): shm_handle, byref(shm_addr), byref(shm_key), - byref(shm_fd), + byref(shm_file), byref(offset), byref(byte_size), ) ) ) - mapped_shm_regions.remove(shm_key.value.decode("utf-8")) + shm_key_copy = bytes(shm_key.value) + _raise_if_error(c_int(_cshm_shared_memory_region_destroy(shm_handle))) + + mapped_shm_regions.remove(shm_key_copy.decode("utf-8")) return diff --git a/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc b/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc index 8c075b23a..267aa8931 100644 --- a/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc +++ b/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -41,29 +41,29 @@ //============================================================================== // SharedMemoryControlContext - -#ifdef _WIN32 namespace { void* SharedMemoryHandleCreate( std::string triton_shm_name, void* shm_addr, std::string shm_key, - HANDLE shm_handle, size_t offset, size_t byte_size) + void* shm_file, size_t offset, size_t byte_size) { SharedMemoryHandle* handle = new SharedMemoryHandle(); handle->triton_shm_name_ = triton_shm_name; handle->base_addr_ = shm_addr; handle->shm_key_ = shm_key; - handle->shm_handle_ = shm_handle; + handle->platform_handle_ = new ShmFile(shm_file); handle->offset_ = offset; handle->byte_size_ = byte_size; - return reinterpret_cast(handle); + return static_cast(handle); } int SharedMemoryRegionMap( - HANDLE shm_handle, size_t offset, size_t byte_size, void** shm_addr) + void* shm_file, size_t offset, size_t byte_size, void** shm_addr) { +#ifdef _WIN32 + HANDLE file_handle = static_cast(shm_file); // The MapViewOfFile function takes a high-order and low-order DWORD (4 bytes // each) for offset. 'size_t' can either be 4 or 8 bytes depending on the // operating system. To handle both cases agnostically, we cast 'offset' to @@ -74,14 +74,14 @@ SharedMemoryRegionMap( DWORD low_order_offset = upperbound_offset & 0xFFFFFFFF; // map shared memory to process address space *shm_addr = MapViewOfFile( - shm_handle, // handle to map object + file_handle, // handle to map object FILE_MAP_ALL_ACCESS, // read/write permission high_order_offset, // offset (high-order DWORD) low_order_offset, // offset (low-order DWORD) byte_size); if (*shm_addr == NULL) { - CloseHandle(shm_handle); + CloseHandle(file_handle); return -1; } // For Windows, we cannot close the shared memory handle here. When all @@ -89,6 +89,17 @@ SharedMemoryRegionMap( // file the shared memory object uses. Instead, we close on error or when // we are destroying the shared memory object. return 0; +#else + int fd = *static_cast(shm_file); + // map shared memory to process address space + *shm_addr = mmap(NULL, byte_size, PROT_WRITE, MAP_SHARED, fd, offset); + if (*shm_addr == MAP_FAILED) { + return -1; + } + + // close shared memory descriptor, return 0 if success else return -1 + return close(fd); +#endif } } // namespace @@ -98,6 +109,7 @@ SharedMemoryRegionCreate( const char* triton_shm_name, const char* shm_key, size_t byte_size, void** shm_handle) { +#ifdef _WIN32 // The CreateFileMapping function takes a high-order and low-order DWORD (4 // bytes each) for size. 'size_t' can either be 4 or 8 bytes depending on the // operating system. To handle both cases agnostically, we cast 'byte_size' to @@ -121,7 +133,7 @@ SharedMemoryRegionCreate( // get base address of shared memory region void* shm_addr = nullptr; - int err = SharedMemoryRegionMap(local_handle, 0, byte_size, &shm_addr); + int err = SharedMemoryRegionMap((void*)local_handle, 0, byte_size, &shm_addr); if (err == -1) { return -4; } @@ -129,92 +141,8 @@ SharedMemoryRegionCreate( // create a handle for the shared memory region *shm_handle = SharedMemoryHandleCreate( std::string(triton_shm_name), shm_addr, std::string(shm_key), - local_handle, 0, byte_size); - return 0; -} - -TRITONCLIENT_DECLSPEC int -SharedMemoryRegionSet( - void* shm_handle, size_t offset, size_t byte_size, const void* data) -{ - void* shm_addr = - reinterpret_cast(shm_handle)->base_addr_; - char* shm_addr_offset = reinterpret_cast(shm_addr); - std::memcpy(shm_addr_offset + offset, data, byte_size); - return 0; -} - -TRITONCLIENT_DECLSPEC int -GetSharedMemoryHandleInfo( - void* shm_handle, char** shm_addr, const char** shm_key, - SHM_FILE* shm_file_handle, size_t* offset, size_t* byte_size) -{ - SharedMemoryHandle* handle = - reinterpret_cast(shm_handle); - *shm_addr = reinterpret_cast(handle->base_addr_); - *shm_key = handle->shm_key_.c_str(); - *shm_file_handle = handle->shm_handle_; - *offset = handle->offset_; - *byte_size = handle->byte_size_; - return 0; -} - -TRITONCLIENT_DECLSPEC int -SharedMemoryRegionDestroy(void* shm_handle) -{ - SharedMemoryHandle* handle = - reinterpret_cast(shm_handle); - void* shm_addr = reinterpret_cast(handle->base_addr_); - bool success = UnmapViewOfFile(shm_addr); - if (!success) { - return -6; - } - // We keep Windows shared memory handles open until we are done - // using them. When all handles are closed, the system will free - // the section of the pagaing file that the object uses. - CloseHandle(handle->shm_handle_); - - return 0; -} + (void*)local_handle, 0, byte_size); #else -namespace { - -void* -SharedMemoryHandleCreate( - std::string triton_shm_name, void* shm_addr, std::string shm_key, - SHM_FILE shm_file, size_t offset, size_t byte_size) -{ - SharedMemoryHandle* handle = new SharedMemoryHandle(); - handle->triton_shm_name_ = triton_shm_name; - handle->base_addr_ = shm_addr; - handle->shm_key_ = shm_key; - handle->shm_file_ = shm_file; - handle->offset_ = offset; - handle->byte_size_ = byte_size; - return reinterpret_cast(handle); -} - -int -SharedMemoryRegionMap( - int shm_fd, size_t offset, size_t byte_size, void** shm_addr) -{ - // map shared memory to process address space - *shm_addr = mmap(NULL, byte_size, PROT_WRITE, MAP_SHARED, shm_fd, offset); - if (*shm_addr == MAP_FAILED) { - return -1; - } - - // close shared memory descriptor, return 0 if success else return -1 - return close(shm_fd); -} - -} // namespace - -int -SharedMemoryRegionCreate( - const char* triton_shm_name, const char* shm_key, size_t byte_size, - void** shm_handle) -{ // get shared memory region descriptor int shm_fd = shm_open(shm_key, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); if (shm_fd == -1) { @@ -229,50 +157,64 @@ SharedMemoryRegionCreate( // get base address of shared memory region void* shm_addr = nullptr; - int err = SharedMemoryRegionMap(shm_fd, 0, byte_size, &shm_addr); + int err = SharedMemoryRegionMap((void*)&shm_fd, 0, byte_size, &shm_addr); if (err == -1) { return -4; } // create a handle for the shared memory region *shm_handle = SharedMemoryHandleCreate( - std::string(triton_shm_name), shm_addr, std::string(shm_key), shm_fd, 0, - byte_size); + std::string(triton_shm_name), shm_addr, std::string(shm_key), + (void*)&shm_fd, 0, byte_size); +#endif return 0; } -int +TRITONCLIENT_DECLSPEC int SharedMemoryRegionSet( void* shm_handle, size_t offset, size_t byte_size, const void* data) { - void* shm_addr = - reinterpret_cast(shm_handle)->base_addr_; - char* shm_addr_offset = reinterpret_cast(shm_addr); + void* shm_addr = static_cast(shm_handle)->base_addr_; + char* shm_addr_offset = static_cast(shm_addr); std::memcpy(shm_addr_offset + offset, data, byte_size); return 0; } -int +TRITONCLIENT_DECLSPEC int GetSharedMemoryHandleInfo( - void* shm_handle, char** shm_addr, const char** shm_key, SHM_FILE* shm_fd, + void* shm_handle, char** shm_addr, const char** shm_key, void** shm_file, size_t* offset, size_t* byte_size) { - SharedMemoryHandle* handle = - reinterpret_cast(shm_handle); - *shm_addr = reinterpret_cast(handle->base_addr_); +#ifdef _WIN32 + HANDLE* file = static_cast(shm_file); +#else + int* file = *static_cast(shm_file); +#endif // _WIN32 + SharedMemoryHandle* handle = static_cast(shm_handle); + *shm_addr = static_cast(handle->base_addr_); *shm_key = handle->shm_key_.c_str(); - *shm_fd = handle->shm_fd_; + *file = handle->platform_handle_->shm_file_; *offset = handle->offset_; *byte_size = handle->byte_size_; return 0; } -int +TRITONCLIENT_DECLSPEC int SharedMemoryRegionDestroy(void* shm_handle) { - SharedMemoryHandle* handle = - reinterpret_cast(shm_handle); - void* shm_addr = reinterpret_cast(handle->base_addr_); + SharedMemoryHandle* handle = static_cast(shm_handle); + void* shm_addr = static_cast(handle->base_addr_); + +#ifdef _WIN32 + bool success = UnmapViewOfFile(shm_addr); + if (!success) { + return -6; + } + // We keep Windows shared memory handles open until we are done + // using them. When all handles are closed, the system will free + // the section of the paging file that the object uses. + CloseHandle(handle->platform_handle_->shm_file_); +#else int status = munmap(shm_addr, handle->byte_size_); if (status == -1) { return -6; @@ -282,8 +224,13 @@ SharedMemoryRegionDestroy(void* shm_handle) if (shm_fd == -1) { return -5; } +#endif // _WIN32 + + // FIXME: Investigate use of smart pointers for these + // allocations instead + delete handle->platform_handle_; + delete handle; return 0; } -#endif //============================================================================== diff --git a/src/python/library/tritonclient/utils/shared_memory/shared_memory.h b/src/python/library/tritonclient/utils/shared_memory/shared_memory.h index e41c7207f..6c1b845d7 100644 --- a/src/python/library/tritonclient/utils/shared_memory/shared_memory.h +++ b/src/python/library/tritonclient/utils/shared_memory/shared_memory.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -31,9 +31,8 @@ extern "C" { #ifdef _WIN32 #define TRITONCLIENT_DECLSPEC __declspec(dllexport) -typedef HANDLE SHM_FILE; #else -define TRITONCLIENT_DECLSPEC typedef int SHM_FILE; +define TRITONCLIENT_DECLSPEC #endif //============================================================================== @@ -44,7 +43,7 @@ TRITONCLIENT_DECLSPEC int SharedMemoryRegionCreate( TRITONCLIENT_DECLSPEC int SharedMemoryRegionSet( void* shm_handle, size_t offset, size_t byte_size, const void* data); TRITONCLIENT_DECLSPEC int GetSharedMemoryHandleInfo( - void* shm_handle, char** shm_addr, const char** shm_key, SHM_FILE* shm_file, + void* shm_handle, char** shm_addr, const char** shm_key, void** shm_file, size_t* offset, size_t* byte_size); TRITONCLIENT_DECLSPEC int SharedMemoryRegionDestroy(void* shm_handle); diff --git a/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h b/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h index 5833f5fae..28dce0759 100644 --- a/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h +++ b/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -32,29 +32,23 @@ #ifdef _WIN32 #include -struct SharedMemoryHandle { - std::string triton_shm_name_; - std::string shm_key_; -#ifdef TRITON_ENABLE_GPU - cudaIpcMemHandle_t cuda_shm_handle_; - int device_id_; -#endif // TRITON_ENABLE_GPU - void* base_addr_; - HANDLE shm_handle_; - size_t offset_; - size_t byte_size_; -}; +#endif // _WIN32 + +struct ShmFile { +#ifdef _WIN32 + HANDLE shm_file_; + ShmFile(void* shm_file) { shm_file_ = static_cast(shm_file); }; #else + int shm_file_; + ShmFile(int shm_file) { shm_file_ = *static_cast(shm_file); }; +#endif // _WIN32 +}; + struct SharedMemoryHandle { std::string triton_shm_name_; std::string shm_key_; -#ifdef TRITON_ENABLE_GPU - cudaIpcMemHandle_t cuda_shm_handle_; - int device_id_; -#endif // TRITON_ENABLE_GPU void* base_addr_; - int shm_fd_; + ShmFile* platform_handle_; size_t offset_; size_t byte_size_; }; -#endif From caa2ed13d1e3e30245b9098c21df0e8a0ff20f9a Mon Sep 17 00:00:00 2001 From: Francesco Petrini Date: Thu, 4 Apr 2024 13:02:32 -0700 Subject: [PATCH 07/10] Revert Cmake copyright --- src/python/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 2d68fd0b7..3725789e0 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions From 191be4cba5922170ce03af15b344d277996c34f8 Mon Sep 17 00:00:00 2001 From: fpetrini15 Date: Fri, 5 Apr 2024 19:41:46 -0700 Subject: [PATCH 08/10] Fix ctypes, don't close fd, use smart pointers --- src/python/library/CMakeLists.txt | 13 +++++++++- src/python/library/build_wheel.py | 24 ++++++++++++------ .../utils/shared_memory/__init__.py | 11 +++++--- .../utils/shared_memory/shared_memory.cc | 25 +++++++++---------- .../utils/shared_memory/shared_memory.h | 2 +- .../shared_memory/shared_memory_handle.h | 12 ++++++--- 6 files changed, 58 insertions(+), 29 deletions(-) diff --git a/src/python/library/CMakeLists.txt b/src/python/library/CMakeLists.txt index 95ed6fb4f..dd9becb48 100644 --- a/src/python/library/CMakeLists.txt +++ b/src/python/library/CMakeLists.txt @@ -128,7 +128,10 @@ else() if (${TRITON_ENABLE_PERF_ANALYZER}) set(perf_analyzer_arg --perf-analyzer ${CMAKE_INSTALL_PREFIX}/bin/perf_analyzer) - endif() + endif() # TRITON_ENABLE_PERF_ANALYZER + if (${TRITON_ENABLE_GPU}) + set(gpu_arg --include-gpu-libs) + endif() # TRITON_ENABLE_GPU set(linux_wheel_stamp_file "linux_stamp.whl") add_custom_command( OUTPUT "${linux_wheel_stamp_file}" @@ -138,6 +141,7 @@ else() --dest-dir "${CMAKE_CURRENT_BINARY_DIR}/linux" --linux ${perf_analyzer_arg} + ${gpu_arg} DEPENDS ${LINUX_WHEEL_DEPENDS} ) @@ -178,7 +182,14 @@ if(${TRITON_ENABLE_PYTHON_GRPC}) ) endif() # TRITON_ENABLE_PYTHON_GRPC +# Generic Wheel +set(WHEEL_DIR "${CMAKE_CURRENT_BINARY_DIR}/generic") +install( + CODE "file(GLOB _Wheel \"${WHEEL_DIR}/triton*.whl\")" + CODE "file(INSTALL \${_Wheel} DESTINATION \"${CMAKE_INSTALL_PREFIX}/python\")" +) +# Platform-specific wheels if(WIN32) set(WHEEL_DIR "${CMAKE_CURRENT_BINARY_DIR}/windows") else() diff --git a/src/python/library/build_wheel.py b/src/python/library/build_wheel.py index 02e28e22f..b9b768a91 100755 --- a/src/python/library/build_wheel.py +++ b/src/python/library/build_wheel.py @@ -91,6 +91,12 @@ def sed(pattern, replace, source, dest=None): required=False, help="Include windows specific artifacts.", ) + parser.add_argument( + "--include-gpu-libs", + action="store_true", + required=False, + help="Include gpu specific libraries", + ) parser.add_argument( "--perf-analyzer", type=str, @@ -186,10 +192,11 @@ def sed(pattern, replace, source, dest=None): "tritonclient/utils/libcshm.so", os.path.join(FLAGS.whl_dir, "tritonclient/utils/shared_memory/libcshm.so"), ) - cpdir( - "tritonclient/utils/cuda_shared_memory", - os.path.join(FLAGS.whl_dir, "tritonclient/utils/cuda_shared_memory"), - ) + if FLAGS.include_gpu_libs: + cpdir( + "tritonclient/utils/cuda_shared_memory", + os.path.join(FLAGS.whl_dir, "tritonclient/utils/cuda_shared_memory"), + ) # Copy the pre-compiled perf_analyzer binary if FLAGS.perf_analyzer is not None: @@ -212,10 +219,11 @@ def sed(pattern, replace, source, dest=None): os.path.join(FLAGS.whl_dir, "tritonclient/utils/shared_memory/cshm.dll"), ) # FIXME: Enable when Windows supports GPU tensors DLIS-4169 - # cpdir( - # "tritonclient/utils/cuda_shared_memory", - # os.path.join(FLAGS.whl_dir, "tritonclient/utils/cuda_shared_memory"), - # ) + # if FLAGS.include_gpu_libs: + # cpdir( + # "tritonclient/utils/cuda_shared_memory", + # os.path.join(FLAGS.whl_dir, "tritonclient/utils/cuda_shared_memory"), + # ) shutil.copyfile("LICENSE.txt", os.path.join(FLAGS.whl_dir, "LICENSE.txt")) shutil.copyfile("setup.py", os.path.join(FLAGS.whl_dir, "setup.py")) diff --git a/src/python/library/tritonclient/utils/shared_memory/__init__.py b/src/python/library/tritonclient/utils/shared_memory/__init__.py index e65881afb..8582db696 100755 --- a/src/python/library/tritonclient/utils/shared_memory/__init__.py +++ b/src/python/library/tritonclient/utils/shared_memory/__init__.py @@ -205,7 +205,10 @@ def get_contents_as_numpy(shm_handle, datatype, shape, offset=0): The numpy array generated using the contents of the specified shared memory region. """ - shm_file = c_void_p() + # Safe initializer for Unix case where shm_file must be dereferenced to + # base in order to store file descriptor. + safe_initializer = c_int(-1) + shm_file = cast(byref(safe_initializer), c_void_p) region_offset = c_uint64() byte_size = c_uint64() shm_addr = c_char_p() @@ -284,8 +287,10 @@ def destroy_shared_memory_region(shm_handle): SharedMemoryException If unable to unlink the shared memory region. """ - - shm_file = c_void_p() + # Safe initializer for Unix case where shm_file must be dereferenced to + # base in order to store file descriptor. + safe_initializer = c_int(-1) + shm_file = cast(byref(safe_initializer), c_void_p) offset = c_uint64() byte_size = c_uint64() shm_addr = c_char_p() diff --git a/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc b/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc index 267aa8931..3539a70ba 100644 --- a/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc +++ b/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc @@ -52,7 +52,7 @@ SharedMemoryHandleCreate( handle->triton_shm_name_ = triton_shm_name; handle->base_addr_ = shm_addr; handle->shm_key_ = shm_key; - handle->platform_handle_ = new ShmFile(shm_file); + handle->platform_handle_ = std::make_unique(shm_file); handle->offset_ = offset; handle->byte_size_ = byte_size; return static_cast(handle); @@ -97,8 +97,7 @@ SharedMemoryRegionMap( return -1; } - // close shared memory descriptor, return 0 if success else return -1 - return close(fd); + return 0; #endif } @@ -119,7 +118,7 @@ SharedMemoryRegionCreate( DWORD high_order_size = (upperbound_size >> 32) & 0xFFFFFFFF; DWORD low_order_size = upperbound_size & 0xFFFFFFFF; - HANDLE local_handle = CreateFileMapping( + HANDLE shm_file = CreateFileMapping( INVALID_HANDLE_VALUE, // use paging file NULL, // default security PAGE_READWRITE, // read/write access @@ -127,13 +126,13 @@ SharedMemoryRegionCreate( low_order_size, // maximum object size (low-order DWORD) shm_key); // name of mapping object - if (local_handle == NULL) { + if (shm_file == NULL) { return -7; } // get base address of shared memory region void* shm_addr = nullptr; - int err = SharedMemoryRegionMap((void*)local_handle, 0, byte_size, &shm_addr); + int err = SharedMemoryRegionMap((void*)shm_file, 0, byte_size, &shm_addr); if (err == -1) { return -4; } @@ -141,7 +140,7 @@ SharedMemoryRegionCreate( // create a handle for the shared memory region *shm_handle = SharedMemoryHandleCreate( std::string(triton_shm_name), shm_addr, std::string(shm_key), - (void*)local_handle, 0, byte_size); + (void*)shm_file, 0, byte_size); #else // get shared memory region descriptor int shm_fd = shm_open(shm_key, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); @@ -188,12 +187,12 @@ GetSharedMemoryHandleInfo( #ifdef _WIN32 HANDLE* file = static_cast(shm_file); #else - int* file = *static_cast(shm_file); + int* file = *reinterpret_cast(shm_file); #endif // _WIN32 SharedMemoryHandle* handle = static_cast(shm_handle); *shm_addr = static_cast(handle->base_addr_); *shm_key = handle->shm_key_.c_str(); - *file = handle->platform_handle_->shm_file_; + *file = *(handle->platform_handle_->GetShmFile()); *offset = handle->offset_; *byte_size = handle->byte_size_; return 0; @@ -213,7 +212,7 @@ SharedMemoryRegionDestroy(void* shm_handle) // We keep Windows shared memory handles open until we are done // using them. When all handles are closed, the system will free // the section of the paging file that the object uses. - CloseHandle(handle->platform_handle_->shm_file_); + CloseHandle(*(handle->platform_handle_->GetShmFile())); #else int status = munmap(shm_addr, handle->byte_size_); if (status == -1) { @@ -224,11 +223,11 @@ SharedMemoryRegionDestroy(void* shm_handle) if (shm_fd == -1) { return -5; } + close(*(handle->platform_handle_->GetShmFile())); #endif // _WIN32 - // FIXME: Investigate use of smart pointers for these - // allocations instead - delete handle->platform_handle_; + // FIXME: Investigate use of smart pointers for this + // allocation instead delete handle; return 0; diff --git a/src/python/library/tritonclient/utils/shared_memory/shared_memory.h b/src/python/library/tritonclient/utils/shared_memory/shared_memory.h index 6c1b845d7..33c2c2aea 100644 --- a/src/python/library/tritonclient/utils/shared_memory/shared_memory.h +++ b/src/python/library/tritonclient/utils/shared_memory/shared_memory.h @@ -32,7 +32,7 @@ extern "C" { #ifdef _WIN32 #define TRITONCLIENT_DECLSPEC __declspec(dllexport) #else -define TRITONCLIENT_DECLSPEC +#define TRITONCLIENT_DECLSPEC #endif //============================================================================== diff --git a/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h b/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h index 28dce0759..dee56697c 100644 --- a/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h +++ b/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h @@ -33,14 +33,20 @@ #ifdef _WIN32 #include #endif // _WIN32 +#include struct ShmFile { #ifdef _WIN32 HANDLE shm_file_; ShmFile(void* shm_file) { shm_file_ = static_cast(shm_file); }; + HANDLE* GetShmFile() { return &shm_file_; }; #else - int shm_file_; - ShmFile(int shm_file) { shm_file_ = *static_cast(shm_file); }; + std::unique_ptr shm_file_; + ShmFile(void* shm_file) + { + shm_file_ = std::make_unique(*static_cast(shm_file)); + }; + int* GetShmFile() { return shm_file_.get(); } #endif // _WIN32 }; @@ -48,7 +54,7 @@ struct SharedMemoryHandle { std::string triton_shm_name_; std::string shm_key_; void* base_addr_; - ShmFile* platform_handle_; + std::unique_ptr platform_handle_; size_t offset_; size_t byte_size_; }; From bd9bea6ab3f226bda8ccff883f7683c461e68fc1 Mon Sep 17 00:00:00 2001 From: fpetrini15 Date: Thu, 11 Apr 2024 12:49:37 -0700 Subject: [PATCH 09/10] Review comments from Guan --- .../utils/shared_memory/__init__.py | 20 +++--- .../utils/shared_memory/shared_memory.cc | 61 +++++++++---------- .../utils/shared_memory/shared_memory.h | 2 +- .../shared_memory/shared_memory_handle.h | 13 ++-- 4 files changed, 44 insertions(+), 52 deletions(-) diff --git a/src/python/library/tritonclient/utils/shared_memory/__init__.py b/src/python/library/tritonclient/utils/shared_memory/__init__.py index 8582db696..e6c21ad4a 100755 --- a/src/python/library/tritonclient/utils/shared_memory/__init__.py +++ b/src/python/library/tritonclient/utils/shared_memory/__init__.py @@ -28,6 +28,7 @@ import os import struct +import sys from ctypes import * import numpy as np @@ -45,6 +46,13 @@ def from_param(cls, value): return value.encode("utf8") +class ShmFile(Structure): + if sys.platform == "win32": + _fields_ = [("shm_handle_", c_void_p)] + else: + _fields_ = [("shm_fd_", c_int)] + + _cshm_lib = "cshm" if os.name == "nt" else "libcshm.so" _cshm_path = pkg_resources.resource_filename( "tritonclient.utils.shared_memory", _cshm_lib @@ -63,7 +71,7 @@ def from_param(cls, value): c_void_p, POINTER(c_char_p), POINTER(c_char_p), - POINTER(c_void_p), + POINTER(ShmFile), POINTER(c_uint64), POINTER(c_uint64), ] @@ -205,10 +213,7 @@ def get_contents_as_numpy(shm_handle, datatype, shape, offset=0): The numpy array generated using the contents of the specified shared memory region. """ - # Safe initializer for Unix case where shm_file must be dereferenced to - # base in order to store file descriptor. - safe_initializer = c_int(-1) - shm_file = cast(byref(safe_initializer), c_void_p) + shm_file = ShmFile() region_offset = c_uint64() byte_size = c_uint64() shm_addr = c_char_p() @@ -287,10 +292,7 @@ def destroy_shared_memory_region(shm_handle): SharedMemoryException If unable to unlink the shared memory region. """ - # Safe initializer for Unix case where shm_file must be dereferenced to - # base in order to store file descriptor. - safe_initializer = c_int(-1) - shm_file = cast(byref(safe_initializer), c_void_p) + shm_file = ShmFile() offset = c_uint64() byte_size = c_uint64() shm_addr = c_char_p() diff --git a/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc b/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc index 3539a70ba..da81a6d34 100644 --- a/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc +++ b/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc @@ -46,13 +46,13 @@ namespace { void* SharedMemoryHandleCreate( std::string triton_shm_name, void* shm_addr, std::string shm_key, - void* shm_file, size_t offset, size_t byte_size) + ShmFile* shm_file, size_t offset, size_t byte_size) { SharedMemoryHandle* handle = new SharedMemoryHandle(); handle->triton_shm_name_ = triton_shm_name; handle->base_addr_ = shm_addr; handle->shm_key_ = shm_key; - handle->platform_handle_ = std::make_unique(shm_file); + handle->platform_handle_.reset(shm_file); handle->offset_ = offset; handle->byte_size_ = byte_size; return static_cast(handle); @@ -60,10 +60,9 @@ SharedMemoryHandleCreate( int SharedMemoryRegionMap( - void* shm_file, size_t offset, size_t byte_size, void** shm_addr) + ShmFile* shm_file, size_t offset, size_t byte_size, void** shm_addr) { #ifdef _WIN32 - HANDLE file_handle = static_cast(shm_file); // The MapViewOfFile function takes a high-order and low-order DWORD (4 bytes // each) for offset. 'size_t' can either be 4 or 8 bytes depending on the // operating system. To handle both cases agnostically, we cast 'offset' to @@ -74,14 +73,14 @@ SharedMemoryRegionMap( DWORD low_order_offset = upperbound_offset & 0xFFFFFFFF; // map shared memory to process address space *shm_addr = MapViewOfFile( - file_handle, // handle to map object - FILE_MAP_ALL_ACCESS, // read/write permission - high_order_offset, // offset (high-order DWORD) - low_order_offset, // offset (low-order DWORD) + shm_file->shm_handle_, // handle to map object + FILE_MAP_ALL_ACCESS, // read/write permission + high_order_offset, // offset (high-order DWORD) + low_order_offset, // offset (low-order DWORD) byte_size); if (*shm_addr == NULL) { - CloseHandle(file_handle); + CloseHandle(shm_file->shm_handle_); return -1; } // For Windows, we cannot close the shared memory handle here. When all @@ -90,9 +89,9 @@ SharedMemoryRegionMap( // we are destroying the shared memory object. return 0; #else - int fd = *static_cast(shm_file); // map shared memory to process address space - *shm_addr = mmap(NULL, byte_size, PROT_WRITE, MAP_SHARED, fd, offset); + *shm_addr = + mmap(NULL, byte_size, PROT_WRITE, MAP_SHARED, shm_file->shm_fd_, offset); if (*shm_addr == MAP_FAILED) { return -1; } @@ -118,7 +117,7 @@ SharedMemoryRegionCreate( DWORD high_order_size = (upperbound_size >> 32) & 0xFFFFFFFF; DWORD low_order_size = upperbound_size & 0xFFFFFFFF; - HANDLE shm_file = CreateFileMapping( + HANDLE win_handle = CreateFileMapping( INVALID_HANDLE_VALUE, // use paging file NULL, // default security PAGE_READWRITE, // read/write access @@ -126,21 +125,17 @@ SharedMemoryRegionCreate( low_order_size, // maximum object size (low-order DWORD) shm_key); // name of mapping object - if (shm_file == NULL) { + if (win_handle == NULL) { return -7; } + ShmFile* shm_file = new ShmFile(win_handle); // get base address of shared memory region void* shm_addr = nullptr; - int err = SharedMemoryRegionMap((void*)shm_file, 0, byte_size, &shm_addr); + int err = SharedMemoryRegionMap(shm_file, 0, byte_size, &shm_addr); if (err == -1) { return -4; } - - // create a handle for the shared memory region - *shm_handle = SharedMemoryHandleCreate( - std::string(triton_shm_name), shm_addr, std::string(shm_key), - (void*)shm_file, 0, byte_size); #else // get shared memory region descriptor int shm_fd = shm_open(shm_key, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); @@ -154,18 +149,18 @@ SharedMemoryRegionCreate( return -3; } + ShmFile* shm_file = new ShmFile(shm_fd); // get base address of shared memory region void* shm_addr = nullptr; - int err = SharedMemoryRegionMap((void*)&shm_fd, 0, byte_size, &shm_addr); + int err = SharedMemoryRegionMap(shm_file, 0, byte_size, &shm_addr); if (err == -1) { return -4; } - +#endif // create a handle for the shared memory region *shm_handle = SharedMemoryHandleCreate( - std::string(triton_shm_name), shm_addr, std::string(shm_key), - (void*)&shm_fd, 0, byte_size); -#endif + std::string(triton_shm_name), shm_addr, std::string(shm_key), shm_file, 0, + byte_size); return 0; } @@ -181,20 +176,20 @@ SharedMemoryRegionSet( TRITONCLIENT_DECLSPEC int GetSharedMemoryHandleInfo( - void* shm_handle, char** shm_addr, const char** shm_key, void** shm_file, + void* shm_handle, char** shm_addr, const char** shm_key, void* shm_file, size_t* offset, size_t* byte_size) { -#ifdef _WIN32 - HANDLE* file = static_cast(shm_file); -#else - int* file = *reinterpret_cast(shm_file); -#endif // _WIN32 SharedMemoryHandle* handle = static_cast(shm_handle); + ShmFile* file = static_cast(shm_file); *shm_addr = static_cast(handle->base_addr_); *shm_key = handle->shm_key_.c_str(); - *file = *(handle->platform_handle_->GetShmFile()); *offset = handle->offset_; *byte_size = handle->byte_size_; +#ifdef _WIN32 + file->shm_handle_ = handle->platform_handle_->shm_handle_; +#else + file->shm_fd_ = handle->platform_handle_->shm_fd_; +#endif return 0; } @@ -212,7 +207,7 @@ SharedMemoryRegionDestroy(void* shm_handle) // We keep Windows shared memory handles open until we are done // using them. When all handles are closed, the system will free // the section of the paging file that the object uses. - CloseHandle(*(handle->platform_handle_->GetShmFile())); + CloseHandle(handle->platform_handle_->shm_handle_); #else int status = munmap(shm_addr, handle->byte_size_); if (status == -1) { @@ -223,7 +218,7 @@ SharedMemoryRegionDestroy(void* shm_handle) if (shm_fd == -1) { return -5; } - close(*(handle->platform_handle_->GetShmFile())); + close(handle->platform_handle_->shm_fd_); #endif // _WIN32 // FIXME: Investigate use of smart pointers for this diff --git a/src/python/library/tritonclient/utils/shared_memory/shared_memory.h b/src/python/library/tritonclient/utils/shared_memory/shared_memory.h index 33c2c2aea..98f0037c0 100644 --- a/src/python/library/tritonclient/utils/shared_memory/shared_memory.h +++ b/src/python/library/tritonclient/utils/shared_memory/shared_memory.h @@ -43,7 +43,7 @@ TRITONCLIENT_DECLSPEC int SharedMemoryRegionCreate( TRITONCLIENT_DECLSPEC int SharedMemoryRegionSet( void* shm_handle, size_t offset, size_t byte_size, const void* data); TRITONCLIENT_DECLSPEC int GetSharedMemoryHandleInfo( - void* shm_handle, char** shm_addr, const char** shm_key, void** shm_file, + void* shm_handle, char** shm_addr, const char** shm_key, void* shm_file, size_t* offset, size_t* byte_size); TRITONCLIENT_DECLSPEC int SharedMemoryRegionDestroy(void* shm_handle); diff --git a/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h b/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h index dee56697c..0bf7b71cb 100644 --- a/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h +++ b/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h @@ -37,16 +37,11 @@ struct ShmFile { #ifdef _WIN32 - HANDLE shm_file_; - ShmFile(void* shm_file) { shm_file_ = static_cast(shm_file); }; - HANDLE* GetShmFile() { return &shm_file_; }; + HANDLE shm_handle_; + ShmFile(HANDLE shm_handle) : shm_handle_(shm_handle){}; #else - std::unique_ptr shm_file_; - ShmFile(void* shm_file) - { - shm_file_ = std::make_unique(*static_cast(shm_file)); - }; - int* GetShmFile() { return shm_file_.get(); } + int shm_fd_; + ShmFile(int shm_fd) : shm_fd_(shm_fd){}; #endif // _WIN32 }; From f854d4a7d326c2a8f13d74727442112b3ba0639f Mon Sep 17 00:00:00 2001 From: Francesco Petrini Date: Sat, 13 Apr 2024 14:04:06 -0700 Subject: [PATCH 10/10] Introduce backing file --- .../utils/shared_memory/__init__.py | 9 +- .../utils/shared_memory/shared_memory.cc | 98 ++++++++++++++----- .../shared_memory/shared_memory_handle.h | 7 +- 3 files changed, 85 insertions(+), 29 deletions(-) diff --git a/src/python/library/tritonclient/utils/shared_memory/__init__.py b/src/python/library/tritonclient/utils/shared_memory/__init__.py index e6c21ad4a..719b96819 100755 --- a/src/python/library/tritonclient/utils/shared_memory/__init__.py +++ b/src/python/library/tritonclient/utils/shared_memory/__init__.py @@ -48,7 +48,10 @@ def from_param(cls, value): class ShmFile(Structure): if sys.platform == "win32": - _fields_ = [("shm_handle_", c_void_p)] + _fields_ = [ + ("backing_file_handle_", c_void_p), + ("shm_mapping_handle_", c_void_p), + ] else: _fields_ = [("shm_fd_", c_int)] @@ -334,7 +337,9 @@ def __init__(self, err): -4: "unable to read/mmap the shared memory region", -5: "unable to unlink the shared memory region", -6: "unable to munmap the shared memory region", - -7: "unable to create file mapping", + -7: "unable to create shm directory or backing file", + -8: "unable to create file mapping", + -9: "unable to delete backing file", } self._msg = None if type(err) == str: diff --git a/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc b/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc index da81a6d34..5242c007d 100644 --- a/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc +++ b/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc @@ -39,6 +39,8 @@ #include "shared_memory.h" #include "shared_memory_handle.h" +#define TRITON_SHM_FILE_ROOT "C:\\triton_shm\\" + //============================================================================== // SharedMemoryControlContext namespace { @@ -46,13 +48,13 @@ namespace { void* SharedMemoryHandleCreate( std::string triton_shm_name, void* shm_addr, std::string shm_key, - ShmFile* shm_file, size_t offset, size_t byte_size) + std::unique_ptr&& shm_file, size_t offset, size_t byte_size) { SharedMemoryHandle* handle = new SharedMemoryHandle(); handle->triton_shm_name_ = triton_shm_name; handle->base_addr_ = shm_addr; handle->shm_key_ = shm_key; - handle->platform_handle_.reset(shm_file); + handle->platform_handle_ = std::move(shm_file); handle->offset_ = offset; handle->byte_size_ = byte_size; return static_cast(handle); @@ -73,14 +75,14 @@ SharedMemoryRegionMap( DWORD low_order_offset = upperbound_offset & 0xFFFFFFFF; // map shared memory to process address space *shm_addr = MapViewOfFile( - shm_file->shm_handle_, // handle to map object - FILE_MAP_ALL_ACCESS, // read/write permission - high_order_offset, // offset (high-order DWORD) - low_order_offset, // offset (low-order DWORD) + shm_file->shm_mapping_handle_, // handle to map object + FILE_MAP_ALL_ACCESS, // read/write permission + high_order_offset, // offset (high-order DWORD) + low_order_offset, // offset (low-order DWORD) byte_size); if (*shm_addr == NULL) { - CloseHandle(shm_file->shm_handle_); + CloseHandle(shm_file->shm_mapping_handle_); return -1; } // For Windows, we cannot close the shared memory handle here. When all @@ -100,6 +102,38 @@ SharedMemoryRegionMap( #endif } +#ifdef _WIN32 +int +SharedMemoryCreateBackingFile(const char* shm_key, HANDLE* backing_file_handle) +{ + LPCSTR backing_file_directory(TRITON_SHM_FILE_ROOT); + bool success = CreateDirectory(backing_file_directory, NULL); + if (!success && GetLastError() != ERROR_ALREADY_EXISTS) { + return -1; + } + LPCSTR backing_file_path = + std::string(TRITON_SHM_FILE_ROOT + std::string(shm_key)).c_str(); + *backing_file_handle = CreateFile( + backing_file_path, GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ, NULL, + OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); + if (*backing_file_handle == INVALID_HANDLE_VALUE) { + return -1; + } + return 0; +} + +int +SharedMemoryDeleteBackingFile(const char* key, HANDLE backing_file_handle) +{ + CloseHandle(backing_file_handle); + LPCSTR backing_file_path = + std::string(TRITON_SHM_FILE_ROOT + std::string(key)).c_str(); + if (!DeleteFile(backing_file_path)) { + return -1; + } +} +#endif + } // namespace TRITONCLIENT_DECLSPEC int @@ -108,6 +142,11 @@ SharedMemoryRegionCreate( void** shm_handle) { #ifdef _WIN32 + HANDLE backing_file_handle; + int err = SharedMemoryCreateBackingFile(shm_key, &backing_file_handle); + if (err == -1) { + return -7; + } // The CreateFileMapping function takes a high-order and low-order DWORD (4 // bytes each) for size. 'size_t' can either be 4 or 8 bytes depending on the // operating system. To handle both cases agnostically, we cast 'byte_size' to @@ -118,22 +157,28 @@ SharedMemoryRegionCreate( DWORD low_order_size = upperbound_size & 0xFFFFFFFF; HANDLE win_handle = CreateFileMapping( - INVALID_HANDLE_VALUE, // use paging file - NULL, // default security - PAGE_READWRITE, // read/write access - high_order_size, // maximum object size (high-order DWORD) - low_order_size, // maximum object size (low-order DWORD) - shm_key); // name of mapping object + backing_file_handle, // use backing file + NULL, // default security + PAGE_READWRITE, // read/write access + high_order_size, // maximum object size (high-order DWORD) + low_order_size, // maximum object size (low-order DWORD) + shm_key); // name of mapping object if (win_handle == NULL) { - return -7; + LPCSTR backing_file_path = + std::string(TRITON_SHM_FILE_ROOT + std::string(shm_key)).c_str(); + // Cleanup backing file on failure + SharedMemoryDeleteBackingFile(shm_key, backing_file_handle); + return -8; } - ShmFile* shm_file = new ShmFile(win_handle); + std::unique_ptr shm_file = + std::make_unique(backing_file_handle, win_handle); // get base address of shared memory region void* shm_addr = nullptr; - int err = SharedMemoryRegionMap(shm_file, 0, byte_size, &shm_addr); + err = SharedMemoryRegionMap(shm_file.get(), 0, byte_size, &shm_addr); if (err == -1) { + SharedMemoryDeleteBackingFile(shm_key, backing_file_handle); return -4; } #else @@ -149,18 +194,18 @@ SharedMemoryRegionCreate( return -3; } - ShmFile* shm_file = new ShmFile(shm_fd); + std::unique_ptr shm_file = std::make_unique(shm_fd); // get base address of shared memory region void* shm_addr = nullptr; - int err = SharedMemoryRegionMap(shm_file, 0, byte_size, &shm_addr); + int err = SharedMemoryRegionMap(shm_file.get(), 0, byte_size, &shm_addr); if (err == -1) { return -4; } #endif // create a handle for the shared memory region *shm_handle = SharedMemoryHandleCreate( - std::string(triton_shm_name), shm_addr, std::string(shm_key), shm_file, 0, - byte_size); + std::string(triton_shm_name), shm_addr, std::string(shm_key), + std::move(shm_file), 0, byte_size); return 0; } @@ -186,7 +231,8 @@ GetSharedMemoryHandleInfo( *offset = handle->offset_; *byte_size = handle->byte_size_; #ifdef _WIN32 - file->shm_handle_ = handle->platform_handle_->shm_handle_; + file->backing_file_handle_ = handle->platform_handle_->shm_mapping_handle_; + file->shm_mapping_handle_ = handle->platform_handle_->shm_mapping_handle_; #else file->shm_fd_ = handle->platform_handle_->shm_fd_; #endif @@ -204,10 +250,12 @@ SharedMemoryRegionDestroy(void* shm_handle) if (!success) { return -6; } - // We keep Windows shared memory handles open until we are done - // using them. When all handles are closed, the system will free - // the section of the paging file that the object uses. - CloseHandle(handle->platform_handle_->shm_handle_); + CloseHandle(handle->platform_handle_->shm_mapping_handle_); + int err = SharedMemoryDeleteBackingFile( + handle->shm_key_.c_str(), handle->platform_handle_->backing_file_handle_); + if (err == -1) { + return -9; + } #else int status = munmap(shm_addr, handle->byte_size_); if (status == -1) { diff --git a/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h b/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h index 0bf7b71cb..bd264546a 100644 --- a/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h +++ b/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h @@ -37,8 +37,11 @@ struct ShmFile { #ifdef _WIN32 - HANDLE shm_handle_; - ShmFile(HANDLE shm_handle) : shm_handle_(shm_handle){}; + HANDLE backing_file_handle_; + HANDLE shm_mapping_handle_; + ShmFile(HANDLE backing_file_handle, HANDLE shm_mapping_handle) + : backing_file_handle_(backing_file_handle), + shm_mapping_handle_(shm_mapping_handle){}; #else int shm_fd_; ShmFile(int shm_fd) : shm_fd_(shm_fd){};