Update deploy (#568)

deploy 部署相关环境更新

Update deploy (#568)
deploy 部署相关环境更新
9597967c · LiangLiu · GitHub · 14034d83 · 9597967c · 9597967c
Unverified Commit 9597967c authored Dec 04, 2025 by LiangLiu Committed by GitHub Dec 04, 2025
4 changed files
--- a/Dockerfile_deploy
+++ b/Dockerfile_deploy
@@ -11,9 +11,22 @@ RUN mkdir /workspace/LightX2V
 WORKDIR /workspace/LightX2V
 ENV PYTHONPATH=/workspace/LightX2V
+# for multi-person & animate
+RUN pip install ultralytics moviepy pydub pyannote.audio onnxruntime decord peft onnxruntime pandas matplotlib loguru sentencepiece
+RUN export COMMIT=0e78a118995e66bb27d78518c4bd9a3e95b4e266 \
+    && export TORCH_CUDA_ARCH_LIST="9.0" \
+    && git clone --depth 1 https://github.com/facebookresearch/sam2.git \
+    && cd sam2 \
+    && git fetch --depth 1 origin $COMMIT \
+    && git checkout $COMMIT \
+    && python setup.py install
+COPY tools tools
 COPY assets assets
 COPY configs configs
 COPY lightx2v lightx2v
 COPY lightx2v_kernel lightx2v_kernel
+COPY lightx2v_platform lightx2v_platform
 COPY --from=frontend_builder /opt/lightx2v/deploy/server/frontend/dist lightx2v/deploy/server/frontend/dist
--- a/lightx2v/deploy/common/audio_separator.py
+++ b/lightx2v/deploy/common/audio_separator.py
@@ -19,6 +19,13 @@ from loguru import logger
 # Import pyannote.audio for speaker diarization
 from pyannote.audio import Audio, Pipeline
+_origin_torch_load = torch.load
+def our_torch_load(checkpoint_file, *args, **kwargs):
+    kwargs["weights_only"] = False
+    return _origin_torch_load(checkpoint_file, *args, **kwargs)
 class AudioSeparator:
    """
@@ -51,6 +58,7 @@ class AudioSeparator:
            model_name = model_path or "pyannote/speaker-diarization-community-1"
            try:
+                torch.load = our_torch_load
                # Try loading with token if available
                if huggingface_token:
                    self.pipeline = Pipeline.from_pretrained(model_name, token=huggingface_token)
@@ -61,6 +69,8 @@ class AudioSeparator:
                if "gated" in str(e).lower() or "token" in str(e).lower():
                    raise RuntimeError(f"Model requires authentication. Set HUGGINGFACE_TOKEN or HF_TOKEN environment variable: {e}")
                raise RuntimeError(f"Failed to load pyannote model: {e}")
+            finally:
+                torch.load = _origin_torch_load
            # Move pipeline to specified device
            if self.device:

--- a/lightx2v/deploy/server/__main__.py
+++ b/lightx2v/deploy/server/__main__.py
@@ -1352,7 +1352,7 @@ async def api_v1_face_detect(request: FaceDetectRequest, user=Depends(verify_use
            return error_response(f"Invalid image format: {str(e)}", 400)
        # Detect faces only (no cropping)
-        result = face_detector.detect_faces(image_bytes, return_image=False)
+        result = await asyncio.to_thread(face_detector.detect_faces, image_bytes, return_image=False)
        faces_data = []
        for i, face in enumerate(result["faces"]):
            faces_data.append(
@@ -1392,13 +1392,13 @@ async def api_v1_audio_separate(request: AudioSeparateRequest, user=Depends(veri
            return error_response(f"Invalid base64 audio data", 400)
        # Separate speakers
-        result = audio_separator.separate_speakers(audio_bytes, num_speakers=request.num_speakers)
+        result = await asyncio.to_thread(audio_separator.separate_speakers, audio_bytes, num_speakers=request.num_speakers)
        # Convert audio tensors to base64 strings (without saving to file)
        speakers_data = []
        for speaker in result["speakers"]:
            # Convert audio tensor directly to base64
-            audio_base64 = audio_separator.speaker_audio_to_base64(speaker["audio"], speaker["sample_rate"], format="wav")
+            audio_base64 = await asyncio.to_thread(audio_separator.speaker_audio_to_base64, speaker["audio"], speaker["sample_rate"], format="wav")
            speakers_data.append(
                {
                    "speaker_id": speaker["speaker_id"],

--- a/lightx2v_platform/base/base.py
+++ b/lightx2v_platform/base/base.py
+import os
 from loguru import logger
 from lightx2v_platform.base import global_var
@@ -21,6 +23,11 @@ def check_ai_device(platform="cuda"):
        raise RuntimeError(f"Unsupported platform: {platform}. Available platforms: {available_platforms}")
    is_available = platform_device.is_available()
    if not is_available:
-        raise RuntimeError(f"AI device for platform '{platform}' is not available. Please check your runtime environment.")
+        skip_platform_check = os.getenv("SKIP_PLATFORM_CHECK", "False") in ["1", "True"]
+        error_msg = f"AI device for platform '{platform}' is not available. Please check your runtime environment."
+        if skip_platform_check:
+            logger.warning(error_msg)
+            return True
+        raise RuntimeError(error_msg)
    logger.info(f"AI device for platform '{platform}' is available.")
    return True