Merge pull request #524 from greenbrettmichael/bg/implement-optional-half-precision

Tom94 · web-flow · commit db4f835b3b44 · 2025-11-25T11:53:18.000+01:00
Make FP precision configurable
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -201,6 +201,22 @@ list(GET CMAKE_CUDA_ARCHITECTURES 0 MIN_GPU_ARCH)
 
 string(REPLACE "-virtual" "" MIN_GPU_ARCH "${MIN_GPU_ARCH}")
 
+if (MIN_GPU_ARCH EQUAL 61 OR MIN_GPU_ARCH LESS_EQUAL 52)
+    set(TCNN_HALF_PRECISION_DEFAULT OFF)
+else()
+    set(TCNN_HALF_PRECISION_DEFAULT ON)
+endif()
+
+option(TCNN_HALF_PRECISION "Enable half precision (FP16) arithmetic" ${TCNN_HALF_PRECISION_DEFAULT})
+
+if (TCNN_HALF_PRECISION)
+    list(APPEND TCNN_DEFINITIONS -DTCNN_HALF_PRECISION=1)
+    message(STATUS "TCNN_HALF_PRECISION: ON")
+else()
+    list(APPEND TCNN_DEFINITIONS -DTCNN_HALF_PRECISION=0)
+    message(STATUS "TCNN_HALF_PRECISION: OFF")
+endif()
+
 message(STATUS "Targeting CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
 if (TCNN_HAS_PARENT)
 	set(TCNN_CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES} PARENT_SCOPE)
diff --git a/README.md b/README.md
@@ -220,6 +220,20 @@ tiny-cuda-nn$ cd bindings/torch
 tiny-cuda-nn/bindings/torch$ python setup.py install
 ```
 
+By default, the extension automatically enables half precision (FP16) on GPUs with good support (Volta, Turing, Ampere, etc.) and disables it on older architectures or those with slow FP16 (e.g., Pascal/GTX 10-series).
+
+If you wish to override this behavior (e.g., to force FP16 on unsupported hardware or disable it for debugging), set the TCNN_HALF_PRECISION environment variable before installation:
+
+Disable FP16: 0
+Enable FP16: 1
+
+Example:
+```sh
+# Linux / macOS (Disable FP16)
+export TCNN_HALF_PRECISION=0
+pip install git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch
+```
+
 Upon success, you can use __tiny-cuda-nn__ models as in the following example:
 ```py
 import commentjson as json
diff --git a/bindings/torch/setup.py b/bindings/torch/setup.py
@@ -146,6 +146,18 @@ def find_cl_path():
 	"-DTCNN_RTC_USE_FAST_MATH",
 ]
 
+if "TCNN_HALF_PRECISION" in os.environ:
+    enable_half = os.environ["TCNN_HALF_PRECISION"].lower() in ["1", "true", "on", "yes"]
+    base_definitions.append(f"-DTCNN_HALF_PRECISION={int(enable_half)}")
+    print(f"Forcing TCNN_HALF_PRECISION to {'ON' if enable_half else 'OFF'}")
+else:
+    if min_compute_capability == 61 or min_compute_capability <= 52:
+        enable_half = False
+    else:
+        enable_half = True
+    print(f"Auto-detecting TCNN_HALF_PRECISION: {'ON' if enable_half else 'OFF'} (Arch: {min_compute_capability})")
+base_definitions.append(f"-DTCNN_HALF_PRECISION={int(enable_half)}")
+
 base_source_files = [
 	"tinycudann/bindings.cpp",
 	"../../dependencies/fmt/src/format.cc",
diff --git a/include/tiny-cuda-nn/common.h b/include/tiny-cuda-nn/common.h
@@ -101,7 +101,9 @@ static constexpr bool PARAMS_ALIGNED = false;
 static constexpr bool PARAMS_ALIGNED = true;
 #endif
 
-#define TCNN_HALF_PRECISION (!(TCNN_MIN_GPU_ARCH == 61 || TCNN_MIN_GPU_ARCH <= 52))
+#ifndef TCNN_HALF_PRECISION
+#error "TCNN_HALF_PRECISION is undefined. The build system must define this explicitly."
+#endif
 
 // TCNN has the following behavior depending on GPU arch.
 // Refer to the first row of the table at the following URL for information about