@@ -328,6 +328,7 @@ struct vk_device_struct {
328
328
uint64_t max_memory_allocation_size;
329
329
uint64_t suballocation_block_size;
330
330
bool fp16;
331
+ bool bf16;
331
332
bool pipeline_robustness;
332
333
vk::Device device;
333
334
uint32_t vendor_id;
@@ -3273,6 +3274,12 @@ static vk_device ggml_vk_get_device(size_t idx) {
3273
3274
3274
3275
device->fp16 = device->fp16 && vk12_features.shaderFloat16;
3275
3276
3277
+ #if defined(VK_KHR_shader_bfloat16)
3278
+ device->bf16 = bfloat16_support && bfloat16_features.shaderBFloat16Type;
3279
+ #else
3280
+ device->bf16 = false;
3281
+ #endif
3282
+
3276
3283
device->pipeline_robustness = pl_robustness_features.pipelineRobustness;
3277
3284
3278
3285
if (device->subgroup_size_control) {
@@ -3615,6 +3622,7 @@ static void ggml_vk_print_gpu_info(size_t idx) {
3615
3622
bool coopmat_support = false;
3616
3623
bool coopmat2_support = false;
3617
3624
bool integer_dot_product = false;
3625
+ bool bfloat16_support = false;
3618
3626
3619
3627
for (auto properties : ext_props) {
3620
3628
if (strcmp("VK_KHR_16bit_storage", properties.extensionName) == 0) {
@@ -3635,6 +3643,11 @@ static void ggml_vk_print_gpu_info(size_t idx) {
3635
3643
} else if (strcmp("VK_KHR_shader_integer_dot_product", properties.extensionName) == 0 &&
3636
3644
!getenv("GGML_VK_DISABLE_INTEGER_DOT_PRODUCT")) {
3637
3645
integer_dot_product = true;
3646
+ #endif
3647
+ #if defined(GGML_VULKAN_BFLOAT16_GLSLC_SUPPORT)
3648
+ } else if (strcmp("VK_KHR_shader_bfloat16", properties.extensionName) == 0 &&
3649
+ !getenv("GGML_VK_DISABLE_BFLOAT16")) {
3650
+ bfloat16_support = true;
3638
3651
#endif
3639
3652
}
3640
3653
}
@@ -3701,10 +3714,25 @@ static void ggml_vk_print_gpu_info(size_t idx) {
3701
3714
last_struct = (VkBaseOutStructure *)&shader_integer_dot_product_features;
3702
3715
}
3703
3716
3717
+ #if defined(VK_KHR_shader_bfloat16)
3718
+ VkPhysicalDeviceShaderBfloat16FeaturesKHR bfloat16_features {};
3719
+ bfloat16_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_BFLOAT16_FEATURES_KHR;
3720
+ if (bfloat16_support) {
3721
+ last_struct->pNext = (VkBaseOutStructure *)&bfloat16_features;
3722
+ last_struct = (VkBaseOutStructure *)&bfloat16_features;
3723
+ }
3724
+ #endif
3725
+
3704
3726
vkGetPhysicalDeviceFeatures2(physical_device, &device_features2);
3705
3727
3706
3728
fp16 = fp16 && vk12_features.shaderFloat16;
3707
3729
3730
+ #if defined(VK_KHR_shader_bfloat16)
3731
+ bool bf16 = bfloat16_support && bfloat16_features.shaderBFloat16Type;
3732
+ #else
3733
+ bool bf16 = false;
3734
+ #endif
3735
+
3708
3736
uint32_t default_subgroup_size = get_subgroup_size("", device_architecture);
3709
3737
const size_t subgroup_size = (default_subgroup_size != 0) ? default_subgroup_size : subgroup_props.subgroupSize;
3710
3738
const bool uma = props2.properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
@@ -3722,8 +3750,8 @@ static void ggml_vk_print_gpu_info(size_t idx) {
3722
3750
std::string matrix_cores = coopmat2_support ? "NV_coopmat2" : coopmat_support ? "KHR_coopmat" : "none";
3723
3751
3724
3752
std::string device_name = props2.properties.deviceName.data();
3725
- GGML_LOG_DEBUG("ggml_vulkan: %zu = %s (%s) | uma: %d | fp16: %d | warp size: %zu | shared memory: %d | int dot: %d | matrix cores: %s\n",
3726
- idx, device_name.c_str(), driver_props.driverName.data(), uma, fp16, subgroup_size,
3753
+ GGML_LOG_DEBUG("ggml_vulkan: %zu = %s (%s) | uma: %d | fp16: %d | bf16: %d | warp size: %zu | shared memory: %d | int dot: %d | matrix cores: %s\n",
3754
+ idx, device_name.c_str(), driver_props.driverName.data(), uma, fp16, bf16, subgroup_size,
3727
3755
props2.properties.limits.maxComputeSharedMemorySize, integer_dot_product, matrix_cores.c_str());
3728
3756
3729
3757
if (props2.properties.deviceType == vk::PhysicalDeviceType::eCpu) {
0 commit comments