implement LtxvEditVideo API node

bigcat88 · bigcat88 · commit 1e6313b15def · 2025-11-27T17:32:14.000+02:00
diff --git a/comfy_api_nodes/nodes_ltxv.py b/comfy_api_nodes/nodes_ltxv.py
@@ -5,14 +5,17 @@
 from pydantic import BaseModel, Field
 from typing_extensions import override
 
-from comfy_api.input_impl import VideoFromFile
-from comfy_api.latest import IO, ComfyExtension
+from comfy_api.latest import IO, ComfyExtension, Input, InputImpl
 from comfy_api_nodes.util import (
     ApiEndpoint,
     get_number_of_images,
     sync_op_raw,
     upload_images_to_comfyapi,
     validate_string,
+    validate_video_duration,
+    validate_video_dimensions,
+    validate_video_frame_count,
+    upload_video_to_comfyapi,
 )
 
 MODELS_MAP = {
@@ -31,6 +34,14 @@ class ExecuteTaskRequest(BaseModel):
     image_uri: Optional[str] = Field(None)
 
 
+class VideoEditRequest(BaseModel):
+    video_uri: str = Field(...)
+    prompt: str = Field(...)
+    start_time: int = Field(...)
+    duration: int = Field(...)
+    mode: str = Field(...)
+
+
 class TextToVideoNode(IO.ComfyNode):
     @classmethod
     def define_schema(cls):
@@ -103,7 +114,7 @@ async def execute(
             as_binary=True,
             max_retries=1,
         )
-        return IO.NodeOutput(VideoFromFile(BytesIO(response)))
+        return IO.NodeOutput(InputImpl.VideoFromFile(BytesIO(response)))
 
 
 class ImageToVideoNode(IO.ComfyNode):
@@ -183,7 +194,76 @@ async def execute(
             as_binary=True,
             max_retries=1,
         )
-        return IO.NodeOutput(VideoFromFile(BytesIO(response)))
+        return IO.NodeOutput(InputImpl.VideoFromFile(BytesIO(response)))
+
+
+class EditVideoNode(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="LtxvApiEditVideoNode",
+            display_name="LTXV Video To Video",
+            category="api node/video/LTXV",
+            description="Edit a specific section of a video by replacing audio, video, or both using AI generation.",
+            inputs=[
+                IO.Video.Input("video"),
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    default="",
+                ),
+                IO.Combo.Input("mode", options=["replace_video", "replace_audio", "replace_audio_and_video"]),
+                IO.Float.Input("start_time", min=0.0, default=0.0),
+                IO.Float.Input("duration", min=1.0, max=20.0, default=3),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        video: Input.Video,
+        prompt: str,
+        mode: str,
+        start_time: float,
+        duration: float,
+    ) -> IO.NodeOutput:
+        validate_string(prompt, min_length=1, max_length=10000)
+        validate_video_dimensions(video, max_width=3840, max_height=2160)
+        validate_video_duration(video, max_duration=20)
+        validate_video_frame_count(video, max_frame_count=505)
+        video_duration = video.get_duration()
+        if start_time >= video_duration:
+            raise ValueError(
+                f"Invalid start_time ({start_time}). Start time is greater than input video duration ({video_duration})"
+            )
+        response = await sync_op_raw(
+            cls,
+            # ApiEndpoint(
+            #     "https://api.ltx.video/v1/retake",
+            #     "POST",
+            #     headers={"Authorization": "Bearer PLACE_YOUR_API_KEY"},
+            # ),
+            ApiEndpoint("/proxy/ltx/v1/retake", "POST"),
+            data=VideoEditRequest(
+                video_uri=await upload_video_to_comfyapi(cls, video),
+                prompt=prompt,
+                mode=mode,
+                start_time=int(start_time),
+                duration=int(duration),
+            ),
+            as_binary=True,
+            max_retries=1,
+        )
+        return IO.NodeOutput(InputImpl.VideoFromFile(BytesIO(response)))
 
 
 class LtxvApiExtension(ComfyExtension):
@@ -192,6 +272,7 @@ async def get_node_list(self) -> list[type[IO.ComfyNode]]:
         return [
             TextToVideoNode,
             ImageToVideoNode,
+            EditVideoNode,
         ]
 
 
diff --git a/comfy_api_nodes/util/__init__.py b/comfy_api_nodes/util/__init__.py
@@ -47,6 +47,7 @@
     validate_string,
     validate_video_dimensions,
     validate_video_duration,
+    validate_video_frame_count,
 )
 
 __all__ = [
@@ -94,6 +95,7 @@
     "validate_string",
     "validate_video_dimensions",
     "validate_video_duration",
+    "validate_video_frame_count",
     # Misc functions
     "get_fs_object_size",
 ]
diff --git a/comfy_api_nodes/util/validation_utils.py b/comfy_api_nodes/util/validation_utils.py
@@ -1,9 +1,7 @@
 import logging
-from typing import Optional
 
 import torch
 
-from comfy_api.input.video_types import VideoInput
 from comfy_api.latest import Input
 
 
@@ -18,10 +16,10 @@ def get_image_dimensions(image: torch.Tensor) -> tuple[int, int]:
 
 def validate_image_dimensions(
     image: torch.Tensor,
-    min_width: Optional[int] = None,
-    max_width: Optional[int] = None,
-    min_height: Optional[int] = None,
-    max_height: Optional[int] = None,
+    min_width: int | None = None,
+    max_width: int | None = None,
+    min_height: int | None = None,
+    max_height: int | None = None,
 ):
     height, width = get_image_dimensions(image)
 
@@ -37,8 +35,8 @@ def validate_image_dimensions(
 
 def validate_image_aspect_ratio(
     image: torch.Tensor,
-    min_ratio: Optional[tuple[float, float]] = None,  # e.g. (1, 4)
-    max_ratio: Optional[tuple[float, float]] = None,  # e.g. (4, 1)
+    min_ratio: tuple[float, float] | None = None,  # e.g. (1, 4)
+    max_ratio: tuple[float, float] | None = None,  # e.g. (4, 1)
     *,
     strict: bool = True,  # True -> (min, max); False -> [min, max]
 ) -> float:
@@ -84,8 +82,8 @@ def validate_images_aspect_ratio_closeness(
 
 def validate_aspect_ratio_string(
     aspect_ratio: str,
-    min_ratio: Optional[tuple[float, float]] = None,  # e.g. (1, 4)
-    max_ratio: Optional[tuple[float, float]] = None,  # e.g. (4, 1)
+    min_ratio: tuple[float, float] | None = None,  # e.g. (1, 4)
+    max_ratio: tuple[float, float] | None = None,  # e.g. (4, 1)
     *,
     strict: bool = False,  # True -> (min, max); False -> [min, max]
 ) -> float:
@@ -97,10 +95,10 @@ def validate_aspect_ratio_string(
 
 def validate_video_dimensions(
     video: Input.Video,
-    min_width: Optional[int] = None,
-    max_width: Optional[int] = None,
-    min_height: Optional[int] = None,
-    max_height: Optional[int] = None,
+    min_width: int | None = None,
+    max_width: int | None = None,
+    min_height: int | None = None,
+    max_height: int | None = None,
 ):
     try:
         width, height = video.get_dimensions()
@@ -120,8 +118,8 @@ def validate_video_dimensions(
 
 def validate_video_duration(
     video: Input.Video,
-    min_duration: Optional[float] = None,
-    max_duration: Optional[float] = None,
+    min_duration: float | None = None,
+    max_duration: float | None = None,
 ):
     try:
         duration = video.get_duration()
@@ -136,6 +134,23 @@ def validate_video_duration(
         raise ValueError(f"Video duration must be at most {max_duration}s, got {duration}s")
 
 
+def validate_video_frame_count(
+    video: Input.Video,
+    min_frame_count: int | None = None,
+    max_frame_count: int | None = None,
+):
+    try:
+        frame_count = video.get_frame_count()
+    except Exception as e:
+        logging.error("Error getting frame count of video: %s", e)
+        return
+
+    if min_frame_count is not None and min_frame_count > frame_count:
+        raise ValueError(f"Video frame count must be at least {min_frame_count}, got {frame_count}")
+    if max_frame_count is not None and frame_count > max_frame_count:
+        raise ValueError(f"Video frame count must be at most {max_frame_count}, got {frame_count}")
+
+
 def get_number_of_images(images):
     if isinstance(images, torch.Tensor):
         return images.shape[0] if images.ndim >= 4 else 1
@@ -144,8 +159,8 @@ def get_number_of_images(images):
 
 def validate_audio_duration(
     audio: Input.Audio,
-    min_duration: Optional[float] = None,
-    max_duration: Optional[float] = None,
+    min_duration: float | None = None,
+    max_duration: float | None = None,
 ) -> None:
     sr = int(audio["sample_rate"])
     dur = int(audio["waveform"].shape[-1]) / sr
@@ -177,7 +192,7 @@ def validate_string(
         )
 
 
-def validate_container_format_is_mp4(video: VideoInput) -> None:
+def validate_container_format_is_mp4(video: Input.Video) -> None:
     """Validates video container format is MP4."""
     container_format = video.get_container_format()
     if container_format not in ["mp4", "mov,mp4,m4a,3gp,3g2,mj2"]:
@@ -194,8 +209,8 @@ def _ratio_from_tuple(r: tuple[float, float]) -> float:
 def _assert_ratio_bounds(
     ar: float,
     *,
-    min_ratio: Optional[tuple[float, float]] = None,
-    max_ratio: Optional[tuple[float, float]] = None,
+    min_ratio: tuple[float, float] | None = None,
+    max_ratio: tuple[float, float] | None = None,
     strict: bool = True,
 ) -> None:
     """Validate a numeric aspect ratio against optional min/max ratio bounds."""

Original file line number	Diff line number	Diff line change
`@@ -47,6 +47,7 @@`
`47`	`47`	`validate_string,`
`48`	`48`	`validate_video_dimensions,`
`49`	`49`	`validate_video_duration,`
	`50`	`+ validate_video_frame_count,`
`50`	`51`	`)`
`51`	`52`
`52`	`53`	`__all__ = [`
`@@ -94,6 +95,7 @@`
`94`	`95`	`"validate_string",`
`95`	`96`	`"validate_video_dimensions",`
`96`	`97`	`"validate_video_duration",`
	`98`	`+ "validate_video_frame_count",`
`97`	`99`	`# Misc functions`
`98`	`100`	`"get_fs_object_size",`
`99`	`101`	`]`