"desktop/src/preload.ts" did not exist on "32c6d8e6cc38b9f51361fa646152f21a7da3ebc2"
data_processing.sh 1.26 KB
Newer Older
zihanl's avatar
zihanl committed
1
2
#!/bin/bash

zihanl's avatar
zihanl committed
3
4
5
6
7
# Data preparation for our framework: preprocessing the WoW and WoI datasets
# The datasets can be downloaded through the following links:
# WoW: https://parl.ai/projects/wizard_of_wikipedia/
# WoI: https://parl.ai/projects/sea/

zihanl's avatar
zihanl committed
8
9
10
11
12
13
14
15
16
DIR=`pwd`
mkdir -p $DIR/tasks/knwl_dialo/data

# We provide the following script to process the raw data from Wizard of Wikipedia
python ${DIR}/tasks/knwl_dialo/preprocessing.py --func process_wow_dataset --input_file <PATH_OF_THE_INPUT_DATA> --output_file <PATH_OF_THE_OUTPUT_DATA>

# We provide the following script to process the raw data from Wizard of Internet
python ${DIR}/tasks/knwl_dialo/preprocessing.py --func process_woi_dataset --input_file <PATH_OF_THE_INPUT_DATA> --output_file <PATH_OF_THE_OUTPUT_DATA>

zihanl's avatar
zihanl committed
17
18
19
# Obtain the knowledge generation prompts and response generation prompts
python ${DIR}/tasks/knwl_dialo/preprocessing.py --func get_prompts --test_file <PATH_OF_THE_PROCESSED_TEST_DATA> --train_file <PATH_OF_THE_PROCESSED_TRAIN_DATA> --model_file <PATH_OF_THE_DPR_MODEL> --output_file <PATH_OF_THE_OUTPUT_FILE>

zihanl's avatar
zihanl committed
20
# Alternatively, we recommend you to directly download the already processed file through:
zihanl's avatar
zihanl committed
21
wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1vP0eGxhkbWfeJ2dUUOEAflbOZq-Jlde_' -O data.gz
zihanl's avatar
zihanl committed
22