"torchvision/vscode:/vscode.git/clone" did not exist on "6334466ef9b41d503d46ac7e519a0ad889b53f1e"
test_batchify.py 1.28 KB
Newer Older
zzg_666's avatar
zzg_666 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from dataflow.operators.core_text import PromptedGenerator
from dataflow.serving import LocalModelLLMServing_sglang, LocalModelLLMServing_vllm
from dataflow.utils.storage import FileStorage
from dataflow.wrapper import BatchWrapper

if __name__ == "__main__":
    
    storage = FileStorage(
        # first_entry_file_name="../example_data/GeneralTextPipeline/translation.jsonl",
        first_entry_file_name="./dataflow/example/GeneralTextPipeline/translation.jsonl",
        cache_path="./cache/temp0_2_topp0_9",
        file_name_prefix="translation",
        cache_type="json",
    )
    llm_serving = LocalModelLLMServing_sglang(
            hf_model_name_or_path="/data0/public_models/Qwen2.5-VL-7B-Instruct",
            sgl_dp_size=1,  # data parallel size
            sgl_tp_size=1,  # tensor parallel size
            sgl_mem_fraction_static=0.8,
    )
    # llm_serving = LocalModelLLMServing_vllm(
    #     hf_model_name_or_path="/data0/public_models/Qwen2.5-VL-7B-Instruct"
    # )
    op = PromptedGenerator(
        llm_serving=llm_serving,
        system_prompt="请将后续内容都翻译成中文,不要续写。:\n",
    )

    batched_op = BatchWrapper(op, batch_size=3, batch_cache=True)
    
    batched_op.run(
        storage=storage.step(),
        input_key="raw_content",
    )