Commit b8707ee2 authored by zihanl's avatar zihanl
Browse files

change directory name to msdp

parent 4e48efdf
# Multi-Stage Prompting for Knowledgeable Dialogue Generation
This directory contains all the scripts of multi-stage prompting for knowledgeable dialogue generation that includes data preparation, and knowledge and response generations. More details are available on [`knowledgeable task directory`](../../tasks/knwl_dialo).
This directory contains all the scripts of multi-stage prompting for knowledgeable dialogue generation that includes data preparation, and knowledge and response generations. More details are available on [`knowledgeable task directory`](../../tasks/msdp).
......@@ -13,13 +13,13 @@ WOI_DATA_FOLDER=<PATH_OF_WIZARD_OF_INTERNET_DATA_FOLDER>
# We provide examples for processing the raw data from Wizard of Wikipedia
# Processing the train dataset (train.json)
python ${DIR}/tasks/knwl_dialo/preprocessing.py \
python ${DIR}/tasks/msdp/preprocessing.py \
--func process_wow_dataset \
--raw_file ${WOW_DATA_FOLDER}/train.json \
--processed_file ${WOW_DATA_FOLDER}/train_processed.txt
# Processing test seen dataset (test_random_split.json)
python ${DIR}/tasks/knwl_dialo/preprocessing.py \
python ${DIR}/tasks/msdp/preprocessing.py \
--func process_wow_dataset \
--raw_file ${WOW_DATA_FOLDER}/test_random_split.json \
--processed_file ${WOW_DATA_FOLDER}/testseen_processed.txt \
......@@ -27,7 +27,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \
--resp_ref_file ${WOW_DATA_FOLDER}/output_testseen_response_reference.txt
# processing test unseen dataset (test_topic_split.json)
python ${DIR}/tasks/knwl_dialo/preprocessing.py \
python ${DIR}/tasks/msdp/preprocessing.py \
--func process_wow_dataset \
--raw_file ${WOW_DATA_FOLDER}/test_topic_split.json \
--processed_file ${WOW_DATA_FOLDER}/testunseen_processed.txt \
......@@ -37,7 +37,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \
# We provide the following script to process the raw data from Wizard of Internet
# Processing the test dataset (test.jsonl)
python ${DIR}/tasks/knwl_dialo/preprocessing.py \
python ${DIR}/tasks/msdp/preprocessing.py \
--func process_woi_dataset \
--raw_file ${WOI_DATA_FOLDER}/test.jsonl \
--processed_file ${WOI_DATA_FOLDER}/test_processed.txt \
......@@ -48,7 +48,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \
# Get the knowledge generation prompts for the each test dataset in WoW and WoI
MODEL_FILE=<PATH_OF_THE_FINETUNED_DPR_MODEL>
# WoW test seen
python ${DIR}/tasks/knwl_dialo/preprocessing.py \
python ${DIR}/tasks/msdp/preprocessing.py \
--func get_knwl_gen_prompts \
--test_file ${WOW_DATA_FOLDER}/testseen_processed.txt \
--train_file ${WOW_DATA_FOLDER}/train_processed.txt \
......@@ -57,7 +57,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \
--data_type wow_seen
# WoW test unseen
python ${DIR}/tasks/knwl_dialo/preprocessing.py \
python ${DIR}/tasks/msdp/preprocessing.py \
--func get_knwl_gen_prompts \
--test_file ${WOW_DATA_FOLDER}/testunseen_processed.txt \
--train_file ${WOW_DATA_FOLDER}/train_processed.txt \
......@@ -66,7 +66,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \
--data_type wow_unseen
# WoI
python ${DIR}/tasks/knwl_dialo/preprocessing.py \
python ${DIR}/tasks/msdp/preprocessing.py \
--func get_knwl_gen_prompts \
--test_file ${WOI_DATA_FOLDER}/test_processed.txt \
--train_file ${WOW_DATA_FOLDER}/train_processed.txt \
......@@ -76,7 +76,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \
# Get the response generation prompts (can be applied for all the test datasets)
python ${DIR}/tasks/knwl_dialo/preprocessing.py \
python ${DIR}/tasks/msdp/preprocessing.py \
--func get_resp_gen_prompts \
--train_file ${WOW_DATA_FOLDER}/train_processed.txt \
--processed_file ${WOW_DATA_FOLDER}/output_response_prompts.txt
......
......@@ -16,14 +16,14 @@ MODEL_GEN_PATH=<PATH_OF_THE_KNOWLEDGE_GENERATION> \
GROUND_TRUTH_PATH=<PATH_OF_THE_GROUND_TRUTH_KNOWLEDGE> \
(e.g., /testseen_knowledge_reference.txt)
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
--num-layers 24 \
--hidden-size 1024 \
--num-attention-heads 16 \
--seq-length 2048 \
--max-position-embeddings 2048 \
--micro-batch-size 4 \
--task KNWL-DIALO-EVAL-F1 \
--task MSDP-EVAL-F1 \
--guess-file ${MODEL_GEN_PATH} \
--answer-file ${GROUND_TRUTH_PATH}
......
......@@ -16,14 +16,14 @@ MODEL_GEN_PATH=<PATH_OF_THE_RESPONSE_GENERATION> \
GROUND_TRUTH_PATH=<PATH_OF_THE_GROUND_TRUTH_RESPONSE> \
(e.g., /testseen_response_reference.txt)
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
--num-layers 24 \
--hidden-size 1024 \
--num-attention-heads 16 \
--seq-length 2048 \
--max-position-embeddings 2048 \
--micro-batch-size 4 \
--task KNWL-DIALO-EVAL-F1 \
--task MSDP-EVAL-F1 \
--guess-file ${MODEL_GEN_PATH} \
--answer-file ${GROUND_TRUTH_PATH}
......@@ -37,14 +37,14 @@ MODEL_GEN_PATH=<PATH_OF_THE_RESPONSE_GENERATION> \
GROUND_TRUTH_PATH=<PATH_OF_THE_GROUND_TRUTH_KNOWLEDGE> \
(e.g., /testseen_knowledge_reference.txt)
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
--num-layers 24 \
--hidden-size 1024 \
--num-attention-heads 16 \
--seq-length 2048 \
--max-position-embeddings 2048 \
--micro-batch-size 4 \
--task KNWL-DIALO-EVAL-F1 \
--task MSDP-EVAL-F1 \
--guess-file ${MODEL_GEN_PATH} \
--answer-file ${GROUND_TRUTH_PATH}
......
......@@ -11,7 +11,7 @@ KNOWLEDGE_FILE=<PATH_OF_GENERATED_KNOWLEDGE_DATA> \
PROCESSED_FILE=<PATH_OF_INPUT_FILE_FOR_RESPONSE_GENERATION> \
(e.g., /testseen_processed_with_generated_knowledge.txt)
python ${DIR}/tasks/knwl_dialo/preprocessing.py \
python ${DIR}/tasks/msdp/preprocessing.py \
--func prepare_input \
--test_file ${TEST_FILE} \
--knowledge_gen_file ${KNOWLEDGE_FILE} \
......
......@@ -22,7 +22,7 @@ PROMPT_PATH=<PATH_OF_KNOWLEDGE_GENERATION_PROMPTS> \
OUTPUT_PATH=<PATH_OF_OUTPUT_GENERATION_FILE> \
(e.g., /testseen_knowledge_generations.txt)
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
--num-layers 24 \
--hidden-size 1024 \
--num-attention-heads 16 \
......@@ -40,7 +40,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
--prompt-file ${PROMPT_PATH} \
--prompt-type knowledge \
--num-prompt-examples 10 \
--task KNWL-DIALO-PROMPT
--task MSDP-PROMPT
# NOTE: If you use api for the model generation, please use
# the "--api-prompt" flag (setting this value as True).
......@@ -22,7 +22,7 @@ PROMPT_PATH=<PATH_OF_RESPONSE_GENERATION_PROMPTS> \
OUTPUT_PATH=<PATH_OF_OUTPUT_GENERATION_FILE> \
(e.g., /output_testseen_response_generations.txt)
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
--num-layers 24 \
--hidden-size 1024 \
--num-attention-heads 16 \
......@@ -40,7 +40,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
--prompt-file ${PROMPT_PATH} \
--prompt-type response \
--num-prompt-examples 20 \
--task KNWL-DIALO-PROMPT
--task MSDP-PROMPT
# NOTE: If you use api for the model generation, please use
# the "--api-prompt" flag (setting this value as True).
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment