data_processing.sh 1.48 KB
Newer Older
zihanl's avatar
zihanl committed
1
2
#!/bin/bash

zihanl's avatar
zihanl committed
3
4
5
6
7
# Data preparation for our framework: preprocessing the WoW and WoI datasets
# The datasets can be downloaded through the following links:
# WoW: https://parl.ai/projects/wizard_of_wikipedia/
# WoI: https://parl.ai/projects/sea/

zihanl's avatar
zihanl committed
8
9
10
DIR=`pwd`

# We provide the following script to process the raw data from Wizard of Wikipedia
zihanl's avatar
zihanl committed
11
python ${DIR}/tasks/knwl_dialo/preprocessing.py --func process_wow_dataset --raw_file <PATH_OF_THE_INPUT_DATA> --processed_file <PATH_OF_THE_OUTPUT_DATA> --knwl_ref_file <PATH_OF_THE_KNOWLEDGE_REFERENCE_OUTPUT_DATA> --resp_ref_file <PATH_OF_THE_RESPONSE_REFERENCE_OUTPUT_DATA>
zihanl's avatar
zihanl committed
12
13

# We provide the following script to process the raw data from Wizard of Internet
zihanl's avatar
zihanl committed
14
python ${DIR}/tasks/knwl_dialo/preprocessing.py --func process_woi_dataset --raw_file <PATH_OF_THE_INPUT_DATA> --processed_file <PATH_OF_THE_OUTPUT_DATA> --knwl_ref_file <PATH_OF_THE_KNOWLEDGE_REFERENCE_OUTPUT_DATA> --resp_ref_file <PATH_OF_THE_RESPONSE_REFERENCE_OUTPUT_DATA>
zihanl's avatar
zihanl committed
15

root's avatar
root committed
16
# Obtain the knowledge generation prompts
zihanl's avatar
zihanl committed
17
python ${DIR}/tasks/knwl_dialo/preprocessing.py --func get_knwl_gen_prompts --test_file <PATH_OF_THE_PROCESSED_TEST_DATA> --train_file <PATH_OF_THE_PROCESSED_TRAIN_DATA> --model_file <PATH_OF_THE_DPR_MODEL> --processed_file <PATH_OF_THE_OUTPUT_FILE> --data_type <DATA_TYPE_OF_THE_INPUT_FILE>
zihanl's avatar
zihanl committed
18

root's avatar
root committed
19
# Obtain the response generation prompts
zihanl's avatar
zihanl committed
20
python ${DIR}/tasks/knwl_dialo/preprocessing.py --func get_resp_gen_prompts --train_file <PATH_OF_THE_PROCESSED_TRAIN_DATA> --processed_file <PATH_OF_THE_OUTPUT_FILE>
zihanl's avatar
zihanl committed
21