import torch from qwen_asr import Qwen3ASRModel model = Qwen3ASRModel.from_pretrained( "Qwen/Qwen3-ASR-1.7B", dtype=torch.bfloat16, device_map="cuda:0", # attn_implementation="flash_attention_2", max_inference_batch_size=32, # Batch size limit for inference. -1 means unlimited. Smaller values can help avoid OOM. max_new_tokens=256, # Maximum number of tokens to generate. Set a larger value for long audio input. ) results = model.transcribe( audio="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen3-ASR-Repo/asr_en.wav", language=None, # set "English" to force the language ) print(results[0].language) print(results[0].text)