Skip to content

Commit 98ddc72

Browse files
committed
Updates
1 parent 2a019eb commit 98ddc72

File tree

1 file changed

+10
-13
lines changed

1 file changed

+10
-13
lines changed

demos/the_narrator_demo/main.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
from transformers import AutoProcessor
1818
from optimum.intel.openvino import OVModelForVisualCausalLM
1919
from transformers import LlavaNextVideoProcessor
20-
from huggingface_hub import login
2120
from optimum.intel import OVWeightQuantizationConfig, OVPipelineQuantizationConfig
2221
from PIL import Image
2322
import tempfile
@@ -309,8 +308,16 @@ def run(video_path: str, model_name: str, flip: bool = True, video_input: bool =
309308
device_mapping = utils.available_devices(exclude=["NPU"])
310309
device_type = "AUTO"
311310

311+
#Downloadn and convert Image and Video models
312312
vision_model, text_decoder, processor = load_models(model_name, device_type)
313313

314+
#For video captioning
315+
model_name_video = "llava-hf/LLaVA-NeXT-Video-7B-hf"
316+
device_type_video = "AUTO"
317+
318+
# Load video input model and processor
319+
model_video, processor_video = load_llava_video_models(model_name_video, device_type_video)
320+
314321
# initialize video player to deliver frames
315322
if isinstance(video_path, str) and video_path.isnumeric():
316323
video_path = int(video_path)
@@ -461,24 +468,14 @@ def run(video_path: str, model_name: str, flip: bool = True, video_input: bool =
461468
caption = "Switching to image_input mode..."
462469
else:
463470
print("Switching to video_input mode...")
464-
model_name = "llava-hf/LLaVA-NeXT-Video-7B-hf"
465-
device_type = "AUTO"
466-
467-
# Stop current worker
468-
global_stop_event.set()
469-
worker.join(timeout=1)
470-
global_stop_event.clear()
471-
472-
# Load video input model and processor
473-
model, processor = load_llava_video_models(model_name, device_type)
474471

475472
# Start new inference worker with video_input=True
476473
worker = threading.Thread(
477474
target=inference_worker,
478475
kwargs={
479476
"video_input": True,
480-
"model": model,
481-
"processor": processor,
477+
"model": model_video,
478+
"processor": processor_video,
482479
"vision_model": None,
483480
"text_decoder": None
484481
},

0 commit comments

Comments
 (0)