Unverified Commit 6a526b83 authored by Dan Anghel's avatar Dan Anghel Committed by GitHub
Browse files

Scripts to download GLDv2 data for DELF training and to install DELF package (#8616)

* First version of working script to download the GLDv2 dataset

* First version of the DEFL package installation script

* First working version of the DELF package installation script

* Fixed feedback from PR review
parent 1705d0df
#!/bin/bash
# Copyright 2020 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# This script downloads the Google Landmarks v2 dataset. To download the dataset
# run the script like in the following example:
# bash download_dataset.sh 500 100 20
#
# The script takes the following parameters, in order:
# - number of image files from the TRAIN split to download (maximum 500)
# - number of image files from the INDEX split to download (maximum 100)
# - number of image files from the TEST split to download (maximum 20)
image_files_train=$1 # Number of image files to download from the TRAIN split
image_files_index=$2 # Number of image files to download from the INDEX split
image_files_test=$3 # Number of image files to download from the TEST split
splits=("train" "test" "index")
dataset_root_folder=gldv2_dataset
metadata_url="https://s3.amazonaws.com/google-landmark/metadata"
ground_truth_url="https://s3.amazonaws.com/google-landmark/ground_truth"
csv_train=(${metadata_url}/train.csv ${metadata_url}/train_clean.csv ${metadata_url}/train_attribution.csv ${metadata_url}/train_label_to_category.csv)
csv_index=(${metadata_url}/index.csv ${metadata_url}/index_image_to_landmark.csv ${metadata_url}/index_label_to_category.csv)
csv_test=(${metadata_url}/test.csv ${ground_truth_url}/recognition_solution_v2.1.csv ${ground_truth_url}/retrieval_solution_v2.1.csv)
images_tar_file_base_url="https://s3.amazonaws.com/google-landmark"
images_md5_file_base_url="https://s3.amazonaws.com/google-landmark/md5sum"
num_processes=6
make_folder() {
# Creates a folder and checks if it exists. Exits if folder creation fails.
local folder=$1
if [ -d "${folder}" ]; then
echo "Folder ${folder} already exists. Skipping folder creation."
else
echo "Creating folder ${folder}."
if mkdir ${folder}; then
echo "Successfully created folder ${folder}."
else
echo "Failed to create folder ${folder}. Exiting."
exit 1
fi
fi
}
download_file() {
# Downloads a file from an URL into a specified folder.
local file_url=$1
local folder=$2
local file_path="${folder}/`basename ${file_url}`"
echo "Downloading file ${file_url} to folder ${folder}."
pushd . > /dev/null
cd ${folder}
curl -Os ${file_url}
popd > /dev/null
}
validate_md5_checksum() {
# Validate the MD5 checksum of a downloaded file.
local content_file=$1
local md5_file=$2
echo "Checking MD5 checksum of file ${content_file} against ${md5_file}"
if [[ "${OSTYPE}" == "linux-gnu" ]]; then
content_md5=`md5sum ${content_file}`
elif [[ "${OSTYPE}" == "darwin"* ]]; then
content_md5=`md5 -r "${content_file}"`
fi
content_md5=`cut -d' ' -f1<<<"${content_md5}"`
expected_md5=`cut -d' ' -f1<<<cat "${md5_file}"`
if [[ "$content_md5" != "" && "$content_md5" = "$expected_md5" ]]; then
echo "Check passed."
else
echo "Check failed. MD5 checksums don't match. Exiting."
exit 1
fi
}
extract_tar_file() {
# Extracts the content of a tar file to a specified folder.
local tar_file=$1
local folder=$2
echo "Extracting file ${tar_file} to folder ${folder}"
tar -C ${folder} -xf ${tar_file}
}
download_image_file() {
# Downloads one image file of a split and untar it.
local split=$1
local idx=`printf "%03g" $2`
local split_folder=$3
local images_tar_file=images_${idx}.tar
local images_tar_file_url=${images_tar_file_base_url}/${split}/${images_tar_file}
local images_tar_file_path=${split_folder}/${images_tar_file}
local images_md5_file=md5.images_${idx}.txt
local images_md5_file_url=${images_md5_file_base_url}/${split}/${images_md5_file}
local images_md5_file_path=${split_folder}/${images_md5_file}
download_file "${images_tar_file_url}" "${split_folder}"
download_file "${images_md5_file_url}" "${split_folder}"
validate_md5_checksum "${images_tar_file_path}" "${images_md5_file_path}"
extract_tar_file "${images_tar_file_path}" "${split_folder}"
}
download_image_files() {
# Downloads all image files of a split and untars them.
local split=$1
local split_folder=$2
local image_files="image_files_${split}"
local max_idx=$(expr ${!image_files} - 1)
echo "Downloading ${!image_files} files form the split ${split} in the folder ${split_folder}."
for i in $(seq 0 ${num_processes} ${max_idx}); do
local curr_max_idx=$(expr ${i} + ${num_processes} - 1)
local last_idx=$((${curr_max_idx}>${max_idx}?${max_idx}:${curr_max_idx}))
for j in $(seq ${i} 1 ${last_idx}); do download_image_file "${split}" "${j}" "${split_folder}" & done
wait
done
}
download_csv_files() {
# Downloads all medatada CSV files of a split.
local split=$1
local split_folder=$2
local csv_list="csv_${split}[*]"
for csv_file in ${!csv_list}; do
download_file "${csv_file}" "${split_folder}"
done
}
download_split() {
# Downloads all artifacts, metadata CSV files and image files of a single split.
local split=$1
local split_folder=${dataset_root_folder}/${split}
make_folder "${split_folder}"
download_csv_files "${split}" "${split_folder}"
download_image_files "${split}" "${split_folder}"
}
download_all_splits() {
# Downloads all artifacts, metadata CSV files and image files of all splits.
make_folder "${dataset_root_folder}"
for split in "${splits[@]}"; do
download_split "$split"
done
}
download_all_splits
exit 0
#!/bin/bash
# Copyright 2020 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# This script installs the DELF package along with its dependencies. To install
# the DELF package run the script like in the following example:
# bash install_delf.sh
protoc_folder="protoc"
protoc_url="https://github.com/google/protobuf/releases/download/v3.3.0/protoc-3.3.0-linux-x86_64.zip"
tf_slim_git_repo="https://github.com/google-research/tf-slim.git"
handle_exit_code() {
# Fail gracefully in case of an exit code different than 0.
exit_code=$1
error_message=$2
if [ ${exit_code} -ne 0 ]; then
echo "${error_message} Exiting."
exit 1
fi
}
install_tensorflow() {
# Install TensorFlow 2.2.
echo "Installing TensorFlow 2.2"
pip3 install --upgrade tensorflow==2.2.0
local exit_code=$?
handle_exit_code ${exit_code} "Unable to install Tensorflow 2.2."
echo "Installing TensorFlow 2.2 for GPU"
pip3 install --upgrade tensorflow-gpu==2.2.0
local exit_code=$?
handle_exit_code ${exit_code} "Unable to install Tensorflow for GPU 2.2.0."
}
install_tf_slim() {
# Install TF-Slim from source.
echo "Installing TF-Slim from source: ${git_repo}"
git clone ${tf_slim_git_repo}
local exit_code=$?
handle_exit_code ${exit_code} "Unable to clone TF-Slim repository ${tf_slim_git_repo}."
pushd . > /dev/null
cd tf-slim
pip3 install .
popd > /dev/null
rm -rf tf-slim
}
download_protoc() {
# Installs the Protobuf compiler protoc.
echo "Downloading Protobuf compiler from ${protoc_url}"
curl -L -Os ${protoc_url}
local exit_code=$?
handle_exit_code ${exit_code} "Unable to download Protobuf compiler from ${tf_slim_git_repo}."
mkdir ${protoc_folder}
local protoc_archive=`basename ${protoc_url}`
unzip ${protoc_archive} -d ${protoc_folder}
local exit_code=$?
handle_exit_code ${exit_code} "Unable to unzip Protobuf compiler from ${protoc_archive}."
rm ${protoc_archive}
}
compile_delf_protos() {
# Compiles DELF protobufs from tensorflow/models/research/delf using the potoc compiler.
echo "Compiling DELF Protobufs"
PATH_TO_PROTOC="`pwd`/${protoc_folder}"
pushd . > /dev/null
cd ../../..
${PATH_TO_PROTOC}/bin/protoc delf/protos/*.proto --python_out=.
local exit_code=$?
handle_exit_code ${exit_code} "Unable to compile DELF Protobufs."
popd > /dev/null
}
cleanup_protoc() {
# Removes the downloaded Protobuf compiler protoc after the installation of the DELF package.
echo "Cleaning up Protobuf compiler download"
rm -rf ${protoc_folder}
}
install_python_libraries() {
# Installs Python libraries upon which the DELF package has dependencies.
echo "Installing matplotlib, numpy, scikit-image, scipy and python3-tk"
pip3 install matplotlib numpy scikit-image scipy
local exit_code=$?
handle_exit_code ${exit_code} "Unable to install at least one of: matplotlib numpy scikit-image scipy."
sudo apt-get -y install python3-tk
local exit_code=$?
handle_exit_code ${exit_code} "Unable to install python3-tk."
}
install_object_detection() {
# Installs the object detection package from tensorflow/models/research.
echo "Installing object detection"
pushd . > /dev/null
cd ../../../..
export PYTHONPATH=$PYTHONPATH:`pwd`
pip3 install .
local exit_code=$?
handle_exit_code ${exit_code} "Unable to install the object_detection package."
popd > /dev/null
}
install_delf_package() {
# Installs the DELF package from tensorflow/models/research/delf/delf.
echo "Installing DELF package"
pushd . > /dev/null
cd ../../..
pip3 install -e .
local exit_code=$?
handle_exit_code ${exit_code} "Unable to install the DELF package."
popd > /dev/null
}
post_install_check() {
# Checks the DELF package has been successfully installed.
echo "Checking DELF package installation"
python3 -c 'import delf'
local exit_code=$?
handle_exit_code ${exit_code} "DELF package installation check failed."
echo "Installation successful."
}
install_delf() {
# Orchestrates DELF package installation.
install_tensorflow
install_tf_slim
download_protoc
compile_delf_protos
cleanup_protoc
install_python_libraries
install_object_detection
install_delf_package
post_install_check
}
install_delf
exit 0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment