@@ -218,6 +218,30 @@ std::string pixel_importer_cpu::import_alpha(md_view<float, 3> dst,
218
218
return " " ;
219
219
}
220
220
221
+
222
+ template <typename T>
223
+ std::string pixel_exporter_cpu::fetch_alpha (md_view<const float , 3 > src, cudaStream_t stream) {
224
+ auto [c, h, w] = src.shape ;
225
+
226
+ if (h * w > max_size) {
227
+ return " dimension too big" ;
228
+ }
229
+
230
+ auto err = cudaMemcpyAsync (buffer_alpha.get (), src.data , h * w * 4 , cudaMemcpyDeviceToHost, stream);
231
+ if (err != cudaSuccess) {
232
+ return std::string (" CUDA error: " ) + cudaGetErrorName (err);
233
+ }
234
+
235
+ err = cudaStreamSynchronize (stream);
236
+ if (err != cudaSuccess) {
237
+ return std::string (" CUDA error: " ) + cudaGetErrorName (err);
238
+ }
239
+
240
+ current_buffer_shape = src.shape ;
241
+ alpha_filled = true ;
242
+ return " " ;
243
+ }
244
+
221
245
template <std::unsigned_integral U>
222
246
std::string pixel_exporter_cpu::fetch_color (md_view<const float , 3 > src,
223
247
md_uview<U, 3 > dst,
@@ -320,29 +344,6 @@ std::string pixel_exporter_cpu::fetch_color(md_view<const float, 3> src,
320
344
return " " ;
321
345
}
322
346
323
- template <typename T>
324
- std::string pixel_exporter_cpu::fetch_alpha (md_view<const float , 3 > src, cudaStream_t stream) {
325
- auto [c, h, w] = src.shape ;
326
-
327
- if (h * w > max_size) {
328
- return " dimension too big" ;
329
- }
330
-
331
- auto err = cudaMemcpyAsync (buffer_alpha.get (), src.data , h * w * 4 , cudaMemcpyDeviceToHost, stream);
332
- if (err != cudaSuccess) {
333
- return std::string (" CUDA error: " ) + cudaGetErrorName (err);
334
- }
335
-
336
- err = cudaStreamSynchronize (stream);
337
- if (err != cudaSuccess) {
338
- return std::string (" CUDA error: " ) + cudaGetErrorName (err);
339
- }
340
-
341
- current_buffer_shape = src.shape ;
342
- alpha_filled = true ;
343
- return " " ;
344
- }
345
-
346
347
// -----------------------------------------------------------------------------
347
348
// GPU part
348
349
@@ -523,7 +524,7 @@ std::string pixel_exporter_gpu<F, eSize>::fetch_alpha(md_view<const F, 3> src, c
523
524
return " dimension too big" ;
524
525
}
525
526
526
- auto err = cudaMemcpyAsync (gpu_buffer_alpha, src.data , h * w, cudaMemcpyDeviceToDevice, stream);
527
+ auto err = cudaMemcpyAsync (gpu_buffer_alpha, src.data , h * w * sizeof (F) , cudaMemcpyDeviceToDevice, stream);
527
528
if (err != cudaSuccess) {
528
529
return std::string (" CUDA error: " ) + cudaGetErrorName (err);
529
530
}
0 commit comments