support noisy preview via API

stduhpf · stduhpf · commit 4e3500c99e8e · 2025-10-25T20:20:01.000+02:00
diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp
@@ -1506,7 +1506,10 @@ bool load_images_from_dir(const std::string dir,
 const char* preview_path;
 float preview_fps;
 
-void step_callback(int step, int frame_count, sd_image_t* image) {
+void step_callback(int step, int frame_count, sd_image_t* image, bool is_noisy) {
+    (void)is_noisy;
+    // is_noisy is set to true if the preview corresponds to noisy latents, false if it's denoised latents
+    // unused in this app, it will either be always noisy or always denoised here
     if (frame_count == 1) {
         stbi_write_png(preview_path, image->width, image->height, image->channel, image->data, 0);
     } else {
@@ -1541,7 +1544,7 @@ int main(int argc, const char* argv[]) {
     params.high_noise_sample_params.guidance.slg.layer_count = params.high_noise_skip_layers.size();
 
     sd_set_log_callback(sd_log_cb, (void*)&params);
-    sd_set_preview_callback((sd_preview_cb_t)step_callback, params.preview_method, params.preview_interval);
+    sd_set_preview_callback((sd_preview_cb_t)step_callback, params.preview_method, params.preview_interval, true, false);
 
     if (params.verbose) {
         print_params(params);
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
@@ -1148,7 +1148,8 @@ class StableDiffusionGGML {
                        enum SDVersion version,
                        preview_t preview_mode,
                        ggml_tensor* result,
-                       std::function<void(int, int, sd_image_t*)> step_callback) {
+                       std::function<void(int, int, sd_image_t*, bool)> step_callback,
+                       bool is_noisy) {
         const uint32_t channel = 3;
         uint32_t width         = latents->ne[0];
         uint32_t height        = latents->ne[1];
@@ -1218,7 +1219,7 @@ class StableDiffusionGGML {
             for (int i = 0; i < frames; i++) {
                 images[i] = {width, height, channel, data + i * width * height * channel};
             }
-            step_callback(step, frames, images);
+            step_callback(step, frames, images, is_noisy);
             free(data);
             free(images);
         } else {
@@ -1272,7 +1273,7 @@ class StableDiffusionGGML {
                 images[i].data    = sd_tensor_to_image(result, i, ggml_n_dims(latents) == 4);
             }
 
-            step_callback(step, frames, images);
+            step_callback(step, frames, images, is_noisy);
             
             ggml_tensor_scale(result, 0);
             for (int i = 0; i < frames; i++) {
@@ -1384,6 +1385,8 @@ class StableDiffusionGGML {
         }
 
         auto denoise = [&](ggml_tensor* input, float sigma, int step) -> ggml_tensor* {
+            auto sd_preview_cb   = sd_get_preview_callback();
+            auto sd_preview_mode = sd_get_preview_mode();
             if (step == 1 || step == -1) {
                 pretty_progress(0, (int)steps, 0);
             }
@@ -1418,6 +1421,11 @@ class StableDiffusionGGML {
             if (denoise_mask != nullptr && version == VERSION_WAN2_2_TI2V) {
                 apply_mask(noised_input, init_latent, denoise_mask);
             }
+            if (sd_preview_cb != NULL && sd_should_preview_noisy()) {
+                if (step % sd_get_preview_interval() == 0) {
+                    preview_image(work_ctx, step, noised_input, version, sd_preview_mode, preview_tensor, sd_preview_cb, true);
+                }
+            }
 
             std::vector<struct ggml_tensor*> controls;
 
@@ -1542,14 +1550,13 @@ class StableDiffusionGGML {
             if (denoise_mask != nullptr) {
                 apply_mask(denoised, init_latent, denoise_mask);
             }
-            auto sd_preview_cb   = sd_get_preview_callback();
-            auto sd_preview_mode = sd_get_preview_mode();
-            if (sd_preview_cb != NULL) {
+
+            if (sd_preview_cb != NULL && sd_should_preview_denoised()) {
                 if (step % sd_get_preview_interval() == 0) {
-                    preview_image(work_ctx, step, denoised, version, sd_preview_mode, preview_tensor, sd_preview_cb);
+                    preview_image(work_ctx, step, denoised, version, sd_preview_mode, preview_tensor, sd_preview_cb, false);
                 }
             }
-            
+
             int64_t t1 = ggml_time_us();
             if (step > 0 || step == -(int)steps) {
                 int showstep = std::abs(step);
diff --git a/stable-diffusion.h b/stable-diffusion.h
@@ -263,11 +263,11 @@ typedef struct sd_ctx_t sd_ctx_t;
 
 typedef void (*sd_log_cb_t)(enum sd_log_level_t level, const char* text, void* data);
 typedef void (*sd_progress_cb_t)(int step, int steps, float time, void* data);
-typedef void (*sd_preview_cb_t)(int, int, sd_image_t*);
+typedef void (*sd_preview_cb_t)(int step, int frame_count, sd_image_t* frames, bool is_noisy);
 
 SD_API void sd_set_log_callback(sd_log_cb_t sd_log_cb, void* data);
 SD_API void sd_set_progress_callback(sd_progress_cb_t cb, void* data);
-SD_API void sd_set_preview_callback(sd_preview_cb_t cb, preview_t mode, int interval);
+SD_API void sd_set_preview_callback(sd_preview_cb_t cb, preview_t mode, int interval, bool denoised, bool noisy);
 SD_API int32_t get_num_physical_cores();
 SD_API const char* sd_get_system_info();
 
diff --git a/util.cpp b/util.cpp
@@ -189,8 +189,10 @@ static sd_progress_cb_t sd_progress_cb = nullptr;
 void* sd_progress_cb_data              = nullptr;
 
 static sd_preview_cb_t sd_preview_cb = NULL;
-preview_t sd_preview_mode         = PREVIEW_NONE;
+preview_t sd_preview_mode            = PREVIEW_NONE;
 int sd_preview_interval              = 1;
+bool sd_preview_denoised             = true;
+bool sd_preview_noisy                = false;
 
 std::u32string utf8_to_utf32(const std::string& utf8_str) {
     std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
@@ -335,10 +337,12 @@ void sd_set_progress_callback(sd_progress_cb_t cb, void* data) {
     sd_progress_cb      = cb;
     sd_progress_cb_data = data;
 }
-void sd_set_preview_callback(sd_preview_cb_t cb, preview_t mode = PREVIEW_PROJ, int interval = 1) {
+void sd_set_preview_callback(sd_preview_cb_t cb, preview_t mode = PREVIEW_PROJ, int interval = 1, bool denoised = true, bool noisy = false) {
     sd_preview_cb       = cb;
     sd_preview_mode     = mode;
     sd_preview_interval = interval;
+    sd_preview_denoised = denoised;
+    sd_preview_noisy    = noisy;
 }
 
 sd_preview_cb_t sd_get_preview_callback() {
@@ -351,6 +355,12 @@ preview_t sd_get_preview_mode() {
 int sd_get_preview_interval() {
     return sd_preview_interval;
 }
+bool sd_should_preview_denoised() {
+    return sd_preview_denoised;
+}
+bool sd_should_preview_noisy() {
+    return sd_preview_noisy;
+}
 
 sd_progress_cb_t sd_get_progress_callback() {
     return sd_progress_cb;
diff --git a/util.h b/util.h
@@ -60,6 +60,8 @@ void* sd_get_progress_callback_data();
 sd_preview_cb_t sd_get_preview_callback();
 preview_t sd_get_preview_mode();
 int sd_get_preview_interval();
+bool sd_should_preview_denoised();
+bool sd_should_preview_noisy();
 
 #define LOG_DEBUG(format, ...) log_printf(SD_LOG_DEBUG, __FILE__, __LINE__, format, ##__VA_ARGS__)
 #define LOG_INFO(format, ...) log_printf(SD_LOG_INFO, __FILE__, __LINE__, format, ##__VA_ARGS__)