Commit b8707ee2 authored by zihanl's avatar zihanl
Browse files

change directory name to msdp

parent 4e48efdf
# Multi-Stage Prompting for Knowledgeable Dialogue Generation # Multi-Stage Prompting for Knowledgeable Dialogue Generation
This directory contains all the scripts of multi-stage prompting for knowledgeable dialogue generation that includes data preparation, and knowledge and response generations. More details are available on [`knowledgeable task directory`](../../tasks/knwl_dialo). This directory contains all the scripts of multi-stage prompting for knowledgeable dialogue generation that includes data preparation, and knowledge and response generations. More details are available on [`knowledgeable task directory`](../../tasks/msdp).
...@@ -13,13 +13,13 @@ WOI_DATA_FOLDER=<PATH_OF_WIZARD_OF_INTERNET_DATA_FOLDER> ...@@ -13,13 +13,13 @@ WOI_DATA_FOLDER=<PATH_OF_WIZARD_OF_INTERNET_DATA_FOLDER>
# We provide examples for processing the raw data from Wizard of Wikipedia # We provide examples for processing the raw data from Wizard of Wikipedia
# Processing the train dataset (train.json) # Processing the train dataset (train.json)
python ${DIR}/tasks/knwl_dialo/preprocessing.py \ python ${DIR}/tasks/msdp/preprocessing.py \
--func process_wow_dataset \ --func process_wow_dataset \
--raw_file ${WOW_DATA_FOLDER}/train.json \ --raw_file ${WOW_DATA_FOLDER}/train.json \
--processed_file ${WOW_DATA_FOLDER}/train_processed.txt --processed_file ${WOW_DATA_FOLDER}/train_processed.txt
# Processing test seen dataset (test_random_split.json) # Processing test seen dataset (test_random_split.json)
python ${DIR}/tasks/knwl_dialo/preprocessing.py \ python ${DIR}/tasks/msdp/preprocessing.py \
--func process_wow_dataset \ --func process_wow_dataset \
--raw_file ${WOW_DATA_FOLDER}/test_random_split.json \ --raw_file ${WOW_DATA_FOLDER}/test_random_split.json \
--processed_file ${WOW_DATA_FOLDER}/testseen_processed.txt \ --processed_file ${WOW_DATA_FOLDER}/testseen_processed.txt \
...@@ -27,7 +27,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \ ...@@ -27,7 +27,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \
--resp_ref_file ${WOW_DATA_FOLDER}/output_testseen_response_reference.txt --resp_ref_file ${WOW_DATA_FOLDER}/output_testseen_response_reference.txt
# processing test unseen dataset (test_topic_split.json) # processing test unseen dataset (test_topic_split.json)
python ${DIR}/tasks/knwl_dialo/preprocessing.py \ python ${DIR}/tasks/msdp/preprocessing.py \
--func process_wow_dataset \ --func process_wow_dataset \
--raw_file ${WOW_DATA_FOLDER}/test_topic_split.json \ --raw_file ${WOW_DATA_FOLDER}/test_topic_split.json \
--processed_file ${WOW_DATA_FOLDER}/testunseen_processed.txt \ --processed_file ${WOW_DATA_FOLDER}/testunseen_processed.txt \
...@@ -37,7 +37,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \ ...@@ -37,7 +37,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \
# We provide the following script to process the raw data from Wizard of Internet # We provide the following script to process the raw data from Wizard of Internet
# Processing the test dataset (test.jsonl) # Processing the test dataset (test.jsonl)
python ${DIR}/tasks/knwl_dialo/preprocessing.py \ python ${DIR}/tasks/msdp/preprocessing.py \
--func process_woi_dataset \ --func process_woi_dataset \
--raw_file ${WOI_DATA_FOLDER}/test.jsonl \ --raw_file ${WOI_DATA_FOLDER}/test.jsonl \
--processed_file ${WOI_DATA_FOLDER}/test_processed.txt \ --processed_file ${WOI_DATA_FOLDER}/test_processed.txt \
...@@ -48,7 +48,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \ ...@@ -48,7 +48,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \
# Get the knowledge generation prompts for the each test dataset in WoW and WoI # Get the knowledge generation prompts for the each test dataset in WoW and WoI
MODEL_FILE=<PATH_OF_THE_FINETUNED_DPR_MODEL> MODEL_FILE=<PATH_OF_THE_FINETUNED_DPR_MODEL>
# WoW test seen # WoW test seen
python ${DIR}/tasks/knwl_dialo/preprocessing.py \ python ${DIR}/tasks/msdp/preprocessing.py \
--func get_knwl_gen_prompts \ --func get_knwl_gen_prompts \
--test_file ${WOW_DATA_FOLDER}/testseen_processed.txt \ --test_file ${WOW_DATA_FOLDER}/testseen_processed.txt \
--train_file ${WOW_DATA_FOLDER}/train_processed.txt \ --train_file ${WOW_DATA_FOLDER}/train_processed.txt \
...@@ -57,7 +57,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \ ...@@ -57,7 +57,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \
--data_type wow_seen --data_type wow_seen
# WoW test unseen # WoW test unseen
python ${DIR}/tasks/knwl_dialo/preprocessing.py \ python ${DIR}/tasks/msdp/preprocessing.py \
--func get_knwl_gen_prompts \ --func get_knwl_gen_prompts \
--test_file ${WOW_DATA_FOLDER}/testunseen_processed.txt \ --test_file ${WOW_DATA_FOLDER}/testunseen_processed.txt \
--train_file ${WOW_DATA_FOLDER}/train_processed.txt \ --train_file ${WOW_DATA_FOLDER}/train_processed.txt \
...@@ -66,7 +66,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \ ...@@ -66,7 +66,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \
--data_type wow_unseen --data_type wow_unseen
# WoI # WoI
python ${DIR}/tasks/knwl_dialo/preprocessing.py \ python ${DIR}/tasks/msdp/preprocessing.py \
--func get_knwl_gen_prompts \ --func get_knwl_gen_prompts \
--test_file ${WOI_DATA_FOLDER}/test_processed.txt \ --test_file ${WOI_DATA_FOLDER}/test_processed.txt \
--train_file ${WOW_DATA_FOLDER}/train_processed.txt \ --train_file ${WOW_DATA_FOLDER}/train_processed.txt \
...@@ -76,7 +76,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \ ...@@ -76,7 +76,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \
# Get the response generation prompts (can be applied for all the test datasets) # Get the response generation prompts (can be applied for all the test datasets)
python ${DIR}/tasks/knwl_dialo/preprocessing.py \ python ${DIR}/tasks/msdp/preprocessing.py \
--func get_resp_gen_prompts \ --func get_resp_gen_prompts \
--train_file ${WOW_DATA_FOLDER}/train_processed.txt \ --train_file ${WOW_DATA_FOLDER}/train_processed.txt \
--processed_file ${WOW_DATA_FOLDER}/output_response_prompts.txt --processed_file ${WOW_DATA_FOLDER}/output_response_prompts.txt
......
...@@ -16,14 +16,14 @@ MODEL_GEN_PATH=<PATH_OF_THE_KNOWLEDGE_GENERATION> \ ...@@ -16,14 +16,14 @@ MODEL_GEN_PATH=<PATH_OF_THE_KNOWLEDGE_GENERATION> \
GROUND_TRUTH_PATH=<PATH_OF_THE_GROUND_TRUTH_KNOWLEDGE> \ GROUND_TRUTH_PATH=<PATH_OF_THE_GROUND_TRUTH_KNOWLEDGE> \
(e.g., /testseen_knowledge_reference.txt) (e.g., /testseen_knowledge_reference.txt)
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
--num-layers 24 \ --num-layers 24 \
--hidden-size 1024 \ --hidden-size 1024 \
--num-attention-heads 16 \ --num-attention-heads 16 \
--seq-length 2048 \ --seq-length 2048 \
--max-position-embeddings 2048 \ --max-position-embeddings 2048 \
--micro-batch-size 4 \ --micro-batch-size 4 \
--task KNWL-DIALO-EVAL-F1 \ --task MSDP-EVAL-F1 \
--guess-file ${MODEL_GEN_PATH} \ --guess-file ${MODEL_GEN_PATH} \
--answer-file ${GROUND_TRUTH_PATH} --answer-file ${GROUND_TRUTH_PATH}
......
...@@ -16,14 +16,14 @@ MODEL_GEN_PATH=<PATH_OF_THE_RESPONSE_GENERATION> \ ...@@ -16,14 +16,14 @@ MODEL_GEN_PATH=<PATH_OF_THE_RESPONSE_GENERATION> \
GROUND_TRUTH_PATH=<PATH_OF_THE_GROUND_TRUTH_RESPONSE> \ GROUND_TRUTH_PATH=<PATH_OF_THE_GROUND_TRUTH_RESPONSE> \
(e.g., /testseen_response_reference.txt) (e.g., /testseen_response_reference.txt)
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
--num-layers 24 \ --num-layers 24 \
--hidden-size 1024 \ --hidden-size 1024 \
--num-attention-heads 16 \ --num-attention-heads 16 \
--seq-length 2048 \ --seq-length 2048 \
--max-position-embeddings 2048 \ --max-position-embeddings 2048 \
--micro-batch-size 4 \ --micro-batch-size 4 \
--task KNWL-DIALO-EVAL-F1 \ --task MSDP-EVAL-F1 \
--guess-file ${MODEL_GEN_PATH} \ --guess-file ${MODEL_GEN_PATH} \
--answer-file ${GROUND_TRUTH_PATH} --answer-file ${GROUND_TRUTH_PATH}
...@@ -37,14 +37,14 @@ MODEL_GEN_PATH=<PATH_OF_THE_RESPONSE_GENERATION> \ ...@@ -37,14 +37,14 @@ MODEL_GEN_PATH=<PATH_OF_THE_RESPONSE_GENERATION> \
GROUND_TRUTH_PATH=<PATH_OF_THE_GROUND_TRUTH_KNOWLEDGE> \ GROUND_TRUTH_PATH=<PATH_OF_THE_GROUND_TRUTH_KNOWLEDGE> \
(e.g., /testseen_knowledge_reference.txt) (e.g., /testseen_knowledge_reference.txt)
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
--num-layers 24 \ --num-layers 24 \
--hidden-size 1024 \ --hidden-size 1024 \
--num-attention-heads 16 \ --num-attention-heads 16 \
--seq-length 2048 \ --seq-length 2048 \
--max-position-embeddings 2048 \ --max-position-embeddings 2048 \
--micro-batch-size 4 \ --micro-batch-size 4 \
--task KNWL-DIALO-EVAL-F1 \ --task MSDP-EVAL-F1 \
--guess-file ${MODEL_GEN_PATH} \ --guess-file ${MODEL_GEN_PATH} \
--answer-file ${GROUND_TRUTH_PATH} --answer-file ${GROUND_TRUTH_PATH}
......
...@@ -11,7 +11,7 @@ KNOWLEDGE_FILE=<PATH_OF_GENERATED_KNOWLEDGE_DATA> \ ...@@ -11,7 +11,7 @@ KNOWLEDGE_FILE=<PATH_OF_GENERATED_KNOWLEDGE_DATA> \
PROCESSED_FILE=<PATH_OF_INPUT_FILE_FOR_RESPONSE_GENERATION> \ PROCESSED_FILE=<PATH_OF_INPUT_FILE_FOR_RESPONSE_GENERATION> \
(e.g., /testseen_processed_with_generated_knowledge.txt) (e.g., /testseen_processed_with_generated_knowledge.txt)
python ${DIR}/tasks/knwl_dialo/preprocessing.py \ python ${DIR}/tasks/msdp/preprocessing.py \
--func prepare_input \ --func prepare_input \
--test_file ${TEST_FILE} \ --test_file ${TEST_FILE} \
--knowledge_gen_file ${KNOWLEDGE_FILE} \ --knowledge_gen_file ${KNOWLEDGE_FILE} \
......
...@@ -22,7 +22,7 @@ PROMPT_PATH=<PATH_OF_KNOWLEDGE_GENERATION_PROMPTS> \ ...@@ -22,7 +22,7 @@ PROMPT_PATH=<PATH_OF_KNOWLEDGE_GENERATION_PROMPTS> \
OUTPUT_PATH=<PATH_OF_OUTPUT_GENERATION_FILE> \ OUTPUT_PATH=<PATH_OF_OUTPUT_GENERATION_FILE> \
(e.g., /testseen_knowledge_generations.txt) (e.g., /testseen_knowledge_generations.txt)
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
--num-layers 24 \ --num-layers 24 \
--hidden-size 1024 \ --hidden-size 1024 \
--num-attention-heads 16 \ --num-attention-heads 16 \
...@@ -40,7 +40,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \ ...@@ -40,7 +40,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
--prompt-file ${PROMPT_PATH} \ --prompt-file ${PROMPT_PATH} \
--prompt-type knowledge \ --prompt-type knowledge \
--num-prompt-examples 10 \ --num-prompt-examples 10 \
--task KNWL-DIALO-PROMPT --task MSDP-PROMPT
# NOTE: If you use api for the model generation, please use # NOTE: If you use api for the model generation, please use
# the "--api-prompt" flag (setting this value as True). # the "--api-prompt" flag (setting this value as True).
...@@ -22,7 +22,7 @@ PROMPT_PATH=<PATH_OF_RESPONSE_GENERATION_PROMPTS> \ ...@@ -22,7 +22,7 @@ PROMPT_PATH=<PATH_OF_RESPONSE_GENERATION_PROMPTS> \
OUTPUT_PATH=<PATH_OF_OUTPUT_GENERATION_FILE> \ OUTPUT_PATH=<PATH_OF_OUTPUT_GENERATION_FILE> \
(e.g., /output_testseen_response_generations.txt) (e.g., /output_testseen_response_generations.txt)
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
--num-layers 24 \ --num-layers 24 \
--hidden-size 1024 \ --hidden-size 1024 \
--num-attention-heads 16 \ --num-attention-heads 16 \
...@@ -40,7 +40,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \ ...@@ -40,7 +40,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
--prompt-file ${PROMPT_PATH} \ --prompt-file ${PROMPT_PATH} \
--prompt-type response \ --prompt-type response \
--num-prompt-examples 20 \ --num-prompt-examples 20 \
--task KNWL-DIALO-PROMPT --task MSDP-PROMPT
# NOTE: If you use api for the model generation, please use # NOTE: If you use api for the model generation, please use
# the "--api-prompt" flag (setting this value as True). # the "--api-prompt" flag (setting this value as True).
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment