Skip to content

Commit 8687970

Browse files
Fix alpha handling of GPU reformatter
1 parent 5bbefa9 commit 8687970

File tree

2 files changed

+27
-31
lines changed

2 files changed

+27
-31
lines changed

reformat/reformat.cu

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -127,13 +127,8 @@ void import_pixel_cuda(md_view<F, 3> dst,
127127
import_opaque_kernel<<<dimGrid, dimBlock, 0, stream>>>(dst, src, F(a), F(b));
128128
}
129129
else {
130-
if (c == 4) {
131-
import_alpha_kernel<<<dimGrid, dimBlock, 0, stream>>>(dst, dst_alpha, src, F(a), F(b));
132-
}
133-
else {
134-
import_opaque_kernel<<<dimGrid, dimBlock, 0, stream>>>(dst, src, F(a), F(b));
135-
cudaMemsetAsync(dst_alpha.data, 0, dst_alpha.size() * sizeof(F), stream);
136-
}
130+
assert(src.shape[2] == 4);
131+
import_alpha_kernel<<<dimGrid, dimBlock, 0, stream>>>(dst, dst_alpha, src, F(a), F(b));
137132
}
138133
}
139134

reformat/reformat.h

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,30 @@ std::string pixel_importer_cpu::import_alpha(md_view<float, 3> dst,
218218
return "";
219219
}
220220

221+
222+
template<typename T>
223+
std::string pixel_exporter_cpu::fetch_alpha(md_view<const float, 3> src, cudaStream_t stream) {
224+
auto [c, h, w] = src.shape;
225+
226+
if (h * w > max_size) {
227+
return "dimension too big";
228+
}
229+
230+
auto err = cudaMemcpyAsync(buffer_alpha.get(), src.data, h * w * 4, cudaMemcpyDeviceToHost, stream);
231+
if (err != cudaSuccess) {
232+
return std::string("CUDA error: ") + cudaGetErrorName(err);
233+
}
234+
235+
err = cudaStreamSynchronize(stream);
236+
if (err != cudaSuccess) {
237+
return std::string("CUDA error: ") + cudaGetErrorName(err);
238+
}
239+
240+
current_buffer_shape = src.shape;
241+
alpha_filled = true;
242+
return "";
243+
}
244+
221245
template<std::unsigned_integral U>
222246
std::string pixel_exporter_cpu::fetch_color(md_view<const float, 3> src,
223247
md_uview<U, 3> dst,
@@ -320,29 +344,6 @@ std::string pixel_exporter_cpu::fetch_color(md_view<const float, 3> src,
320344
return "";
321345
}
322346

323-
template<typename T>
324-
std::string pixel_exporter_cpu::fetch_alpha(md_view<const float, 3> src, cudaStream_t stream) {
325-
auto [c, h, w] = src.shape;
326-
327-
if (h * w > max_size) {
328-
return "dimension too big";
329-
}
330-
331-
auto err = cudaMemcpyAsync(buffer_alpha.get(), src.data, h * w * 4, cudaMemcpyDeviceToHost, stream);
332-
if (err != cudaSuccess) {
333-
return std::string("CUDA error: ") + cudaGetErrorName(err);
334-
}
335-
336-
err = cudaStreamSynchronize(stream);
337-
if (err != cudaSuccess) {
338-
return std::string("CUDA error: ") + cudaGetErrorName(err);
339-
}
340-
341-
current_buffer_shape = src.shape;
342-
alpha_filled = true;
343-
return "";
344-
}
345-
346347
// -----------------------------------------------------------------------------
347348
// GPU part
348349

@@ -523,7 +524,7 @@ std::string pixel_exporter_gpu<F, eSize>::fetch_alpha(md_view<const F, 3> src, c
523524
return "dimension too big";
524525
}
525526

526-
auto err = cudaMemcpyAsync(gpu_buffer_alpha, src.data, h * w, cudaMemcpyDeviceToDevice, stream);
527+
auto err = cudaMemcpyAsync(gpu_buffer_alpha, src.data, h * w * sizeof(F), cudaMemcpyDeviceToDevice, stream);
527528
if (err != cudaSuccess) {
528529
return std::string("CUDA error: ") + cudaGetErrorName(err);
529530
}

0 commit comments

Comments
 (0)