import base64 import datetime import io import multiprocessing import re from typing import AsyncGenerator from transformers import AutoTokenizer, AutoFeatureExtractor from PIL import Image from vllm import LLM, SamplingParams import time import torchaudio import numpy as np import os from decord import VideoReader, cpu import torch import asyncio from vllm import AsyncLLMEngine, AsyncEngineArgs, SamplingParams import shortuuid from vllm.utils import random_uuid import gradio as gr from collections import deque from queue import Empty import cv2 import json from web_demo.wakeup_and_vad.wakeup_and_vad import WakeupAndVAD from tencentcloud.common import credential from tencentcloud.common.profile.client_profile import ClientProfile from tencentcloud.common.profile.http_profile import HttpProfile from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException from tencentcloud.tts.v20190823 import tts_client, models IMAGE_TOKEN_INDEX = 51000 AUDIO_TOKEN_INDEX = 51001 IMAGE_TOKEN = "" AUDIO_TOKEN = "