ggml-org · yeahdongcn · Aug 27, 2025
@@ -81,7 +81,7 @@ static __global__ void mmq_ids_helper(
 #pragma unroll
             for (int offset = neu_padded; offset < warp_size; offset += neu_padded) {
                 const int tmp = __shfl_up_sync(0xFFFFFFFF, it_compact_add_self, offset, warp_size);
-                if (threadIdx.x >= offset) {
+                if (threadIdx.x >= static_cast<unsigned int>(offset)) {
                     it_compact_add_lower += tmp;
                 }
             }
@@ -110,7 +110,7 @@ static __global__ void mmq_ids_helper(
 
     expert_bounds[expert] = nex_prev;
 
-    if (expert < gridDim.x - 1) {
+    if (expert < static_cast<int>(gridDim.x) - 1) {
         return;
     }
 

diff --git a/ggml/src/ggml-cuda/pad_reflect_1d.cu b/ggml/src/ggml-cuda/pad_reflect_1d.cu
@@ -47,6 +47,8 @@ static __global__ void pad_reflect_1d_kernel_f32(
 
         *(float *)(dst_ptr + i0 * nb0) = value;
     }
+
+    GGML_UNUSED(ne00);
 }
 
 void ggml_cuda_op_pad_reflect_1d(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
-Original file line number
+Diff line change
@@ Expand Up / @@ -47,6 +47,8 @@ static __global__ void pad_reflect_1d_kernel_f32( @@
             *(float *)(dst_ptr + i0 * nb0) = value;
         }
+        GGML_UNUSED(ne00);
     }
     void ggml_cuda_op_pad_reflect_1d(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
@@ Expand Down @@