prompt_resp_gen.sh 1.42 KB
Newer Older
1
2
#!/bin/bash

zihanl's avatar
zihanl committed
3
4
5
6
7
# Stage-2: Prompt a pretrained language model to generate the corresponding response
# The input contains prompts, current dialogue context, and generated knowledge in Stage-1
# The output is the corresponding response.
# The size of the pretrained language model is 357M

8
9
10
11
12
13
14
15
WORLD_SIZE=8

DISTRIBUTED_ARGS="--nproc_per_node $WORLD_SIZE \
                  --nnodes 1 \
                  --node_rank 0 \
                  --master_addr localhost \
                  --master_port 6000"

zihanl's avatar
zihanl committed
16
17
18
19
20
21
CHECKPOINT_PATH=<PATH_OF_THE_LANGUAGE_MODEL>
INPUT_PATH=<PATH_OF_THE_INPUT_TEST_DATA_FILE>
VOCAB_PATH=<PATH_OF_THE_VOCAB_FILE>
MERGE_PATH=<PATH_OF_THE_MERGE_FILE>
OUTPUT_PATH=<PATH_OF_THE_OUTPUT_GENERATION_FILE>
PROMPT_PATH=<PATH_OF_THE_RESPONSE_GENERATION_PROMPTS>
22
23
24
25
26
27
28
29

python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
        --num-layers 24 \
        --hidden-size 1024 \
        --num-attention-heads 16 \
        --seq-length 2048 \
        --max-position-embeddings 2048 \
        --micro-batch-size 1 \
zihanl's avatar
zihanl committed
30
31
        --vocab-file ${VOCAB_PATH} \
        --merge-file ${MERGE_PATH} \
32
33
34
35
36
37
38
39
40
        --load ${CHECKPOINT_PATH} \
        --fp16 \
        --DDP-impl torch \
        --tokenizer-type GPT2BPETokenizer \
        --sample-input-file ${INPUT_PATH} \
        --sample-output-file ${OUTPUT_PATH} \
        --prompt-file ${PROMPT_PATH} \
        --prompt-type response \
        --num-prompt-examples 20 \
zihanl's avatar
zihanl committed
41
        --task KNWL-DIALO-PROMPT