from transformers import pipeline import torch pipe = pipeline( "text-generation", model="ByteDance/Ouro-1.4B", trust_remote_code=True, device_map="auto", # 自动分配模型到所有可用GPU torch_dtype=torch.float16 # 使用半精度减少显存占用 ) messages = [ {"role": "user", "content": "介绍下你自己"}, ] result = pipe(messages) print(result)