update format

ff15473d · zihanl · 01a0914d · ff15473d · ff15473d
Commit ff15473d authored Dec 06, 2021 by zihanl
Showing with 43 additions and 8 deletions

tasks/knwl_dialo/scripts/data_processing.sh tasks/knwl_dialo/scripts/data_processing.sh +38 -7

tasks/knwl_dialo/scripts/prep_resp_gen.sh tasks/knwl_dialo/scripts/prep_resp_gen.sh +5 -1

No files found.
--- a/tasks/knwl_dialo/scripts/data_processing.sh
+++ b/tasks/knwl_dialo/scripts/data_processing.sh
@@ -9,19 +9,50 @@ DIR=`pwd`
 mkdir ${DIR}/tasks/knwl_dialo/data
 mkdir ${DIR}/tasks/knwl_dialo/data/wizard_of_wikipedia
 mkdir ${DIR}/tasks/knwl_dialo/data/wizard_of_internet
-# Before running the preprocessing, please download the datasets and put them into the corresponding created data folder.
+# Before running the preprocessing, please download the datasets, 
+# and put them into the corresponding created data folder.
 # We provide examples for processing the raw data from Wizard of Wikipedia
-python ${DIR}/tasks/knwl_dialo/preprocessing.py --func process_wow_dataset --raw_file ${DIR}/tasks/knwl_dialo/data/wizard_of_wikipedia/train.json --processed_file <PATH_OF_THE_PROCESSED_WOW_TRAIN_DATA>
+python ${DIR}/tasks/knwl_dialo/preprocessing.py \
-python ${DIR}/tasks/knwl_dialo/preprocessing.py --func process_wow_dataset --raw_file ${DIR}/tasks/knwl_dialo/data/wizard_of_wikipedia/test_random_split.json --processed_file <PATH_OF_THE_PROCESSED_TEST_SEEN_DATA> --knwl_ref_file <PATH_OF_THE_TEST_SEEN_KNOWLEDGE_REFERENCE_OUTPUT_DATA> --resp_ref_file <PATH_OF_THE_TEST_SEEN_RESPONSE_REFERENCE_OUTPUT_DATA>
+        --func process_wow_dataset \
-python ${DIR}/tasks/knwl_dialo/preprocessing.py --func process_wow_dataset --raw_file ${DIR}/tasks/knwl_dialo/data/wizard_of_wikipedia/test_topic_split.json --processed_file <PATH_OF_THE_PROCESSED_TEST_UNSEEN_DATA> --knwl_ref_file <PATH_OF_THE_TEST_UNSEEN_KNOWLEDGE_REFERENCE_OUTPUT_DATA> --resp_ref_file <PATH_OF_THE_TEST_UNSEEN_RESPONSE_REFERENCE_OUTPUT_DATA>
+        --raw_file ${DIR}/tasks/knwl_dialo/data/wizard_of_wikipedia/train.json \
+        --processed_file <PATH_OF_THE_PROCESSED_WOW_TRAIN_DATA>
+python ${DIR}/tasks/knwl_dialo/preprocessing.py \
+        --func process_wow_dataset \
+        --raw_file ${DIR}/tasks/knwl_dialo/data/wizard_of_wikipedia/test_random_split.json \
+        --processed_file <PATH_OF_THE_PROCESSED_TEST_SEEN_DATA> \
+        --knwl_ref_file <PATH_OF_THE_TEST_SEEN_KNOWLEDGE_REFERENCE_OUTPUT_DATA> \
+        --resp_ref_file <PATH_OF_THE_TEST_SEEN_RESPONSE_REFERENCE_OUTPUT_DATA>
+python ${DIR}/tasks/knwl_dialo/preprocessing.py \
+        --func process_wow_dataset \
+        --raw_file ${DIR}/tasks/knwl_dialo/data/wizard_of_wikipedia/test_topic_split.json \
+        --processed_file <PATH_OF_THE_PROCESSED_TEST_UNSEEN_DATA> \
+        --knwl_ref_file <PATH_OF_THE_TEST_UNSEEN_KNOWLEDGE_REFERENCE_OUTPUT_DATA> \
+        --resp_ref_file <PATH_OF_THE_TEST_UNSEEN_RESPONSE_REFERENCE_OUTPUT_DATA>
 # We provide the following script to process the raw data from Wizard of Internet
-python ${DIR}/tasks/knwl_dialo/preprocessing.py --func process_woi_dataset --raw_file ${DIR}/tasks/knwl_dialo/data/wizard_of_internet/test.jsonl --processed_file <PATH_OF_THE_PROCESSED_TEST_DATA> --knwl_ref_file <PATH_OF_THE_TEST_KNOWLEDGE_REFERENCE_OUTPUT_DATA> --resp_ref_file <PATH_OF_THE_TEST_RESPONSE_REFERENCE_OUTPUT_DATA>
+python ${DIR}/tasks/knwl_dialo/preprocessing.py \
+        --func process_woi_dataset \
+        --raw_file ${DIR}/tasks/knwl_dialo/data/wizard_of_internet/test.jsonl \
+        --processed_file <PATH_OF_THE_PROCESSED_TEST_DATA> \
+        --knwl_ref_file <PATH_OF_THE_TEST_KNOWLEDGE_REFERENCE_OUTPUT_DATA> \
+        --resp_ref_file <PATH_OF_THE_TEST_RESPONSE_REFERENCE_OUTPUT_DATA>
 # Obtain the knowledge generation prompts for each test dataset (Wizard of Wikipedia test seen/unseen and Wizard of Internet test)
-python ${DIR}/tasks/knwl_dialo/preprocessing.py --func get_knwl_gen_prompts --test_file <PATH_OF_THE_PROCESSED_TEST_DATA> --train_file <PATH_OF_THE_PROCESSED_WOW_TRAIN_DATA> --model_file <PATH_OF_THE_DPR_MODEL> --processed_file <PATH_OF_THE_OUTPUT_PROMPT_FILE> --data_type <DATA_TYPE_OF_THE_INPUT_FILE>
+python ${DIR}/tasks/knwl_dialo/preprocessing.py \
+        --func get_knwl_gen_prompts \
+        --test_file <PATH_OF_THE_PROCESSED_TEST_DATA> \
+        --train_file <PATH_OF_THE_PROCESSED_WOW_TRAIN_DATA> \
+        --model_file <PATH_OF_THE_DPR_MODEL> \
+        --processed_file <PATH_OF_THE_OUTPUT_PROMPT_FILE> \
+        --data_type <DATA_TYPE_OF_THE_INPUT_FILE>
 # Obtain the response generation prompts
-python ${DIR}/tasks/knwl_dialo/preprocessing.py --func get_resp_gen_prompts --train_file <PATH_OF_THE_PROCESSED_WOW_TRAIN_DATA> --processed_file <PATH_OF_THE_OUTPUT_PROMPT_FILE>
+python ${DIR}/tasks/knwl_dialo/preprocessing.py \
+        --func get_resp_gen_prompts \
+        --train_file <PATH_OF_THE_PROCESSED_WOW_TRAIN_DATA> \
+        --processed_file <PATH_OF_THE_OUTPUT_PROMPT_FILE>
--- a/tasks/knwl_dialo/scripts/prep_resp_gen.sh
+++ b/tasks/knwl_dialo/scripts/prep_resp_gen.sh
@@ -3,4 +3,8 @@
 # Preparing the input file for the response generation (second-stage prompting)
 DIR=`pwd`
-python ${DIR}/tasks/knwl_dialo/preprocessing.py --func prepare_input --test_file <PATH_OF_THE_PROCESSED_TEST_DATA> --knowledge_gen_file <PATH_OF_THE_GENERATED_KNOWLEDGE_DATA> --processed_file <PATH_OF_THE_INPUT_FILE_FOR_RESPONSE_GENERATION>
+python ${DIR}/tasks/knwl_dialo/preprocessing.py \
+        --func prepare_input \
+        --test_file <PATH_OF_THE_PROCESSED_TEST_DATA> \
+        --knowledge_gen_file <PATH_OF_THE_GENERATED_KNOWLEDGE_DATA> \
+        --processed_file <PATH_OF_THE_INPUT_FILE_FOR_RESPONSE_GENERATION>