change directory name to msdp

b8707ee2 · zihanl · 4e48efdf · b8707ee2 · b8707ee2 · b8707ee2
Commit b8707ee2 authored Dec 13, 2021 by zihanl
7 changed files
--- a/examples/knwl_dialo/README.md
+++ b/examples/knwl_dialo/README.md

 # Multi-Stage Prompting for Knowledgeable Dialogue Generation

-This directory contains all the scripts of multi-stage prompting for knowledgeable dialogue generation that includes data preparation, and knowledge and response generations. More details are available on [`knowledgeable task directory`](../../tasks/knwl_dialo).
+This directory contains all the scripts of multi-stage prompting for knowledgeable dialogue generation that includes data preparation, and knowledge and response generations. More details are available on [`knowledgeable task directory`](../../tasks/msdp).

--- a/examples/knwl_dialo/data_processing.sh
+++ b/examples/knwl_dialo/data_processing.sh
@@ -13,13 +13,13 @@ WOI_DATA_FOLDER=<PATH_OF_WIZARD_OF_INTERNET_DATA_FOLDER>

 # We provide examples for processing the raw data from Wizard of Wikipedia
 # Processing the train dataset (train.json)
-python ${DIR}/tasks/knwl_dialo/preprocessing.py \
+python ${DIR}/tasks/msdp/preprocessing.py \
        --func process_wow_dataset \
        --raw_file ${WOW_DATA_FOLDER}/train.json \
        --processed_file ${WOW_DATA_FOLDER}/train_processed.txt

 # Processing test seen dataset (test_random_split.json)
-python ${DIR}/tasks/knwl_dialo/preprocessing.py \
+python ${DIR}/tasks/msdp/preprocessing.py \
        --func process_wow_dataset \
        --raw_file ${WOW_DATA_FOLDER}/test_random_split.json \
        --processed_file ${WOW_DATA_FOLDER}/testseen_processed.txt \
@@ -27,7 +27,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \
        --resp_ref_file ${WOW_DATA_FOLDER}/output_testseen_response_reference.txt

 # processing test unseen dataset (test_topic_split.json)
-python ${DIR}/tasks/knwl_dialo/preprocessing.py \
+python ${DIR}/tasks/msdp/preprocessing.py \
        --func process_wow_dataset \
        --raw_file ${WOW_DATA_FOLDER}/test_topic_split.json \
        --processed_file ${WOW_DATA_FOLDER}/testunseen_processed.txt \
@@ -37,7 +37,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \

 # We provide the following script to process the raw data from Wizard of Internet
 # Processing the test dataset (test.jsonl)
-python ${DIR}/tasks/knwl_dialo/preprocessing.py \
+python ${DIR}/tasks/msdp/preprocessing.py \
        --func process_woi_dataset \
        --raw_file ${WOI_DATA_FOLDER}/test.jsonl \
        --processed_file ${WOI_DATA_FOLDER}/test_processed.txt \
@@ -48,7 +48,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \
 # Get the knowledge generation prompts for the each test dataset in WoW and WoI
 MODEL_FILE=<PATH_OF_THE_FINETUNED_DPR_MODEL> 
 # WoW test seen
-python ${DIR}/tasks/knwl_dialo/preprocessing.py \
+python ${DIR}/tasks/msdp/preprocessing.py \
        --func get_knwl_gen_prompts \
        --test_file ${WOW_DATA_FOLDER}/testseen_processed.txt \
        --train_file ${WOW_DATA_FOLDER}/train_processed.txt \
@@ -57,7 +57,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \
        --data_type wow_seen

 # WoW test unseen
-python ${DIR}/tasks/knwl_dialo/preprocessing.py \
+python ${DIR}/tasks/msdp/preprocessing.py \
        --func get_knwl_gen_prompts \
        --test_file ${WOW_DATA_FOLDER}/testunseen_processed.txt \
        --train_file ${WOW_DATA_FOLDER}/train_processed.txt \
@@ -66,7 +66,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \
        --data_type wow_unseen

 # WoI
-python ${DIR}/tasks/knwl_dialo/preprocessing.py \
+python ${DIR}/tasks/msdp/preprocessing.py \
        --func get_knwl_gen_prompts \
        --test_file ${WOI_DATA_FOLDER}/test_processed.txt \
        --train_file ${WOW_DATA_FOLDER}/train_processed.txt \
@@ -76,7 +76,7 @@ python ${DIR}/tasks/knwl_dialo/preprocessing.py \


 # Get the response generation prompts (can be applied for all the test datasets)
-python ${DIR}/tasks/knwl_dialo/preprocessing.py \
+python ${DIR}/tasks/msdp/preprocessing.py \
        --func get_resp_gen_prompts \
        --train_file ${WOW_DATA_FOLDER}/train_processed.txt \
        --processed_file ${WOW_DATA_FOLDER}/output_response_prompts.txt

--- a/examples/knwl_dialo/eval_knwl_generation.sh
+++ b/examples/knwl_dialo/eval_knwl_generation.sh
@@ -16,14 +16,14 @@ MODEL_GEN_PATH=<PATH_OF_THE_KNOWLEDGE_GENERATION> \
 GROUND_TRUTH_PATH=<PATH_OF_THE_GROUND_TRUTH_KNOWLEDGE> \ 
        (e.g., /testseen_knowledge_reference.txt)

-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
        --num-layers 24 \
        --hidden-size 1024 \
        --num-attention-heads 16 \
        --seq-length 2048 \
        --max-position-embeddings 2048 \
        --micro-batch-size 4 \
-        --task KNWL-DIALO-EVAL-F1 \
+        --task MSDP-EVAL-F1 \
        --guess-file ${MODEL_GEN_PATH} \
        --answer-file ${GROUND_TRUTH_PATH}


--- a/examples/knwl_dialo/eval_resp_generation.sh
+++ b/examples/knwl_dialo/eval_resp_generation.sh
@@ -16,14 +16,14 @@ MODEL_GEN_PATH=<PATH_OF_THE_RESPONSE_GENERATION> \
 GROUND_TRUTH_PATH=<PATH_OF_THE_GROUND_TRUTH_RESPONSE> \ 
        (e.g., /testseen_response_reference.txt)

-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
        --num-layers 24 \
        --hidden-size 1024 \
        --num-attention-heads 16 \
        --seq-length 2048 \
        --max-position-embeddings 2048 \
        --micro-batch-size 4 \
-        --task KNWL-DIALO-EVAL-F1 \
+        --task MSDP-EVAL-F1 \
        --guess-file ${MODEL_GEN_PATH} \
        --answer-file ${GROUND_TRUTH_PATH}

@@ -37,14 +37,14 @@ MODEL_GEN_PATH=<PATH_OF_THE_RESPONSE_GENERATION> \
 GROUND_TRUTH_PATH=<PATH_OF_THE_GROUND_TRUTH_KNOWLEDGE> \ 
        (e.g., /testseen_knowledge_reference.txt)

-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
        --num-layers 24 \
        --hidden-size 1024 \
        --num-attention-heads 16 \
        --seq-length 2048 \
        --max-position-embeddings 2048 \
        --micro-batch-size 4 \
-        --task KNWL-DIALO-EVAL-F1 \
+        --task MSDP-EVAL-F1 \
        --guess-file ${MODEL_GEN_PATH} \
        --answer-file ${GROUND_TRUTH_PATH}


--- a/examples/knwl_dialo/prep_resp_gen.sh
+++ b/examples/knwl_dialo/prep_resp_gen.sh
@@ -11,7 +11,7 @@ KNOWLEDGE_FILE=<PATH_OF_GENERATED_KNOWLEDGE_DATA> \
 PROCESSED_FILE=<PATH_OF_INPUT_FILE_FOR_RESPONSE_GENERATION> \
        (e.g., /testseen_processed_with_generated_knowledge.txt)

-python ${DIR}/tasks/knwl_dialo/preprocessing.py \
+python ${DIR}/tasks/msdp/preprocessing.py \
        --func prepare_input \
        --test_file ${TEST_FILE} \
        --knowledge_gen_file ${KNOWLEDGE_FILE} \

--- a/examples/knwl_dialo/prompt_knwl_gen.sh
+++ b/examples/knwl_dialo/prompt_knwl_gen.sh
@@ -22,7 +22,7 @@ PROMPT_PATH=<PATH_OF_KNOWLEDGE_GENERATION_PROMPTS> \
 OUTPUT_PATH=<PATH_OF_OUTPUT_GENERATION_FILE> \
        (e.g., /testseen_knowledge_generations.txt)

-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
        --num-layers 24 \
        --hidden-size 1024 \
        --num-attention-heads 16 \
@@ -40,7 +40,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
        --prompt-file ${PROMPT_PATH} \
        --prompt-type knowledge \
        --num-prompt-examples 10 \
-        --task KNWL-DIALO-PROMPT 
+        --task MSDP-PROMPT 

 # NOTE: If you use api for the model generation, please use 
 # the "--api-prompt" flag (setting this value as True). 
--- a/examples/knwl_dialo/prompt_resp_gen.sh
+++ b/examples/knwl_dialo/prompt_resp_gen.sh
@@ -22,7 +22,7 @@ PROMPT_PATH=<PATH_OF_RESPONSE_GENERATION_PROMPTS> \
 OUTPUT_PATH=<PATH_OF_OUTPUT_GENERATION_FILE> \
        (e.g., /output_testseen_response_generations.txt)

-python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
+python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
        --num-layers 24 \
        --hidden-size 1024 \
        --num-attention-heads 16 \
@@ -40,7 +40,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
        --prompt-file ${PROMPT_PATH} \
        --prompt-type response \
        --num-prompt-examples 20 \
-        --task KNWL-DIALO-PROMPT 
+        --task MSDP-PROMPT 

 # NOTE: If you use api for the model generation, please use 
 # the "--api-prompt" flag (setting this value as True).