Skip to content

Commit db4f835

Browse files
authored
Merge pull request #524 from greenbrettmichael/bg/implement-optional-half-precision
Make FP precision configurable
2 parents 6bf1525 + 7a9d197 commit db4f835

File tree

4 files changed

+45
-1
lines changed

4 files changed

+45
-1
lines changed

CMakeLists.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,22 @@ list(GET CMAKE_CUDA_ARCHITECTURES 0 MIN_GPU_ARCH)
201201

202202
string(REPLACE "-virtual" "" MIN_GPU_ARCH "${MIN_GPU_ARCH}")
203203

204+
if (MIN_GPU_ARCH EQUAL 61 OR MIN_GPU_ARCH LESS_EQUAL 52)
205+
set(TCNN_HALF_PRECISION_DEFAULT OFF)
206+
else()
207+
set(TCNN_HALF_PRECISION_DEFAULT ON)
208+
endif()
209+
210+
option(TCNN_HALF_PRECISION "Enable half precision (FP16) arithmetic" ${TCNN_HALF_PRECISION_DEFAULT})
211+
212+
if (TCNN_HALF_PRECISION)
213+
list(APPEND TCNN_DEFINITIONS -DTCNN_HALF_PRECISION=1)
214+
message(STATUS "TCNN_HALF_PRECISION: ON")
215+
else()
216+
list(APPEND TCNN_DEFINITIONS -DTCNN_HALF_PRECISION=0)
217+
message(STATUS "TCNN_HALF_PRECISION: OFF")
218+
endif()
219+
204220
message(STATUS "Targeting CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
205221
if (TCNN_HAS_PARENT)
206222
set(TCNN_CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES} PARENT_SCOPE)

README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,20 @@ tiny-cuda-nn$ cd bindings/torch
220220
tiny-cuda-nn/bindings/torch$ python setup.py install
221221
```
222222

223+
By default, the extension automatically enables half precision (FP16) on GPUs with good support (Volta, Turing, Ampere, etc.) and disables it on older architectures or those with slow FP16 (e.g., Pascal/GTX 10-series).
224+
225+
If you wish to override this behavior (e.g., to force FP16 on unsupported hardware or disable it for debugging), set the TCNN_HALF_PRECISION environment variable before installation:
226+
227+
Disable FP16: 0
228+
Enable FP16: 1
229+
230+
Example:
231+
```sh
232+
# Linux / macOS (Disable FP16)
233+
export TCNN_HALF_PRECISION=0
234+
pip install git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch
235+
```
236+
223237
Upon success, you can use __tiny-cuda-nn__ models as in the following example:
224238
```py
225239
import commentjson as json

bindings/torch/setup.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,18 @@ def find_cl_path():
146146
"-DTCNN_RTC_USE_FAST_MATH",
147147
]
148148

149+
if "TCNN_HALF_PRECISION" in os.environ:
150+
enable_half = os.environ["TCNN_HALF_PRECISION"].lower() in ["1", "true", "on", "yes"]
151+
base_definitions.append(f"-DTCNN_HALF_PRECISION={int(enable_half)}")
152+
print(f"Forcing TCNN_HALF_PRECISION to {'ON' if enable_half else 'OFF'}")
153+
else:
154+
if min_compute_capability == 61 or min_compute_capability <= 52:
155+
enable_half = False
156+
else:
157+
enable_half = True
158+
print(f"Auto-detecting TCNN_HALF_PRECISION: {'ON' if enable_half else 'OFF'} (Arch: {min_compute_capability})")
159+
base_definitions.append(f"-DTCNN_HALF_PRECISION={int(enable_half)}")
160+
149161
base_source_files = [
150162
"tinycudann/bindings.cpp",
151163
"../../dependencies/fmt/src/format.cc",

include/tiny-cuda-nn/common.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,9 @@ static constexpr bool PARAMS_ALIGNED = false;
101101
static constexpr bool PARAMS_ALIGNED = true;
102102
#endif
103103

104-
#define TCNN_HALF_PRECISION (!(TCNN_MIN_GPU_ARCH == 61 || TCNN_MIN_GPU_ARCH <= 52))
104+
#ifndef TCNN_HALF_PRECISION
105+
#error "TCNN_HALF_PRECISION is undefined. The build system must define this explicitly."
106+
#endif
105107

106108
// TCNN has the following behavior depending on GPU arch.
107109
// Refer to the first row of the table at the following URL for information about

0 commit comments

Comments
 (0)