"examples/vscode:/vscode.git/clone" did not exist on "95eada24fcff616aebdffe9aa0b174eee2264fc5"
Unverified Commit 6a526b83 authored by Dan Anghel's avatar Dan Anghel Committed by GitHub
Browse files

Scripts to download GLDv2 data for DELF training and to install DELF package (#8616)

* First version of working script to download the GLDv2 dataset

* First version of the DEFL package installation script

* First working version of the DELF package installation script

* Fixed feedback from PR review
parent 1705d0df
#!/bin/bash
# Copyright 2020 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# This script downloads the Google Landmarks v2 dataset. To download the dataset
# run the script like in the following example:
# bash download_dataset.sh 500 100 20
#
# The script takes the following parameters, in order:
# - number of image files from the TRAIN split to download (maximum 500)
# - number of image files from the INDEX split to download (maximum 100)
# - number of image files from the TEST split to download (maximum 20)
image_files_train=$1 # Number of image files to download from the TRAIN split
image_files_index=$2 # Number of image files to download from the INDEX split
image_files_test=$3 # Number of image files to download from the TEST split
splits=("train" "test" "index")
dataset_root_folder=gldv2_dataset
metadata_url="https://s3.amazonaws.com/google-landmark/metadata"
ground_truth_url="https://s3.amazonaws.com/google-landmark/ground_truth"
csv_train=(${metadata_url}/train.csv ${metadata_url}/train_clean.csv ${metadata_url}/train_attribution.csv ${metadata_url}/train_label_to_category.csv)
csv_index=(${metadata_url}/index.csv ${metadata_url}/index_image_to_landmark.csv ${metadata_url}/index_label_to_category.csv)
csv_test=(${metadata_url}/test.csv ${ground_truth_url}/recognition_solution_v2.1.csv ${ground_truth_url}/retrieval_solution_v2.1.csv)
images_tar_file_base_url="https://s3.amazonaws.com/google-landmark"
images_md5_file_base_url="https://s3.amazonaws.com/google-landmark/md5sum"
num_processes=6
make_folder() {
# Creates a folder and checks if it exists. Exits if folder creation fails.
local folder=$1
if [ -d "${folder}" ]; then
echo "Folder ${folder} already exists. Skipping folder creation."
else
echo "Creating folder ${folder}."
if mkdir ${folder}; then
echo "Successfully created folder ${folder}."
else
echo "Failed to create folder ${folder}. Exiting."
exit 1
fi
fi
}
download_file() {
# Downloads a file from an URL into a specified folder.
local file_url=$1
local folder=$2
local file_path="${folder}/`basename ${file_url}`"
echo "Downloading file ${file_url} to folder ${folder}."
pushd . > /dev/null
cd ${folder}
curl -Os ${file_url}
popd > /dev/null
}
validate_md5_checksum() {
# Validate the MD5 checksum of a downloaded file.
local content_file=$1
local md5_file=$2
echo "Checking MD5 checksum of file ${content_file} against ${md5_file}"
if [[ "${OSTYPE}" == "linux-gnu" ]]; then
content_md5=`md5sum ${content_file}`
elif [[ "${OSTYPE}" == "darwin"* ]]; then
content_md5=`md5 -r "${content_file}"`
fi
content_md5=`cut -d' ' -f1<<<"${content_md5}"`
expected_md5=`cut -d' ' -f1<<<cat "${md5_file}"`
if [[ "$content_md5" != "" && "$content_md5" = "$expected_md5" ]]; then
echo "Check passed."
else
echo "Check failed. MD5 checksums don't match. Exiting."
exit 1
fi
}
extract_tar_file() {
# Extracts the content of a tar file to a specified folder.
local tar_file=$1
local folder=$2
echo "Extracting file ${tar_file} to folder ${folder}"
tar -C ${folder} -xf ${tar_file}
}
download_image_file() {
# Downloads one image file of a split and untar it.
local split=$1
local idx=`printf "%03g" $2`
local split_folder=$3
local images_tar_file=images_${idx}.tar
local images_tar_file_url=${images_tar_file_base_url}/${split}/${images_tar_file}
local images_tar_file_path=${split_folder}/${images_tar_file}
local images_md5_file=md5.images_${idx}.txt
local images_md5_file_url=${images_md5_file_base_url}/${split}/${images_md5_file}
local images_md5_file_path=${split_folder}/${images_md5_file}
download_file "${images_tar_file_url}" "${split_folder}"
download_file "${images_md5_file_url}" "${split_folder}"
validate_md5_checksum "${images_tar_file_path}" "${images_md5_file_path}"
extract_tar_file "${images_tar_file_path}" "${split_folder}"
}
download_image_files() {
# Downloads all image files of a split and untars them.
local split=$1
local split_folder=$2
local image_files="image_files_${split}"
local max_idx=$(expr ${!image_files} - 1)
echo "Downloading ${!image_files} files form the split ${split} in the folder ${split_folder}."
for i in $(seq 0 ${num_processes} ${max_idx}); do
local curr_max_idx=$(expr ${i} + ${num_processes} - 1)
local last_idx=$((${curr_max_idx}>${max_idx}?${max_idx}:${curr_max_idx}))
for j in $(seq ${i} 1 ${last_idx}); do download_image_file "${split}" "${j}" "${split_folder}" & done
wait
done
}
download_csv_files() {
# Downloads all medatada CSV files of a split.
local split=$1
local split_folder=$2
local csv_list="csv_${split}[*]"
for csv_file in ${!csv_list}; do
download_file "${csv_file}" "${split_folder}"
done
}
download_split() {
# Downloads all artifacts, metadata CSV files and image files of a single split.
local split=$1
local split_folder=${dataset_root_folder}/${split}
make_folder "${split_folder}"
download_csv_files "${split}" "${split_folder}"
download_image_files "${split}" "${split_folder}"
}
download_all_splits() {
# Downloads all artifacts, metadata CSV files and image files of all splits.
make_folder "${dataset_root_folder}"
for split in "${splits[@]}"; do
download_split "$split"
done
}
download_all_splits
exit 0
#!/bin/bash
# Copyright 2020 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# This script installs the DELF package along with its dependencies. To install
# the DELF package run the script like in the following example:
# bash install_delf.sh
protoc_folder="protoc"
protoc_url="https://github.com/google/protobuf/releases/download/v3.3.0/protoc-3.3.0-linux-x86_64.zip"
tf_slim_git_repo="https://github.com/google-research/tf-slim.git"
handle_exit_code() {
# Fail gracefully in case of an exit code different than 0.
exit_code=$1
error_message=$2
if [ ${exit_code} -ne 0 ]; then
echo "${error_message} Exiting."
exit 1
fi
}
install_tensorflow() {
# Install TensorFlow 2.2.
echo "Installing TensorFlow 2.2"
pip3 install --upgrade tensorflow==2.2.0
local exit_code=$?
handle_exit_code ${exit_code} "Unable to install Tensorflow 2.2."
echo "Installing TensorFlow 2.2 for GPU"
pip3 install --upgrade tensorflow-gpu==2.2.0
local exit_code=$?
handle_exit_code ${exit_code} "Unable to install Tensorflow for GPU 2.2.0."
}
install_tf_slim() {
# Install TF-Slim from source.
echo "Installing TF-Slim from source: ${git_repo}"
git clone ${tf_slim_git_repo}
local exit_code=$?
handle_exit_code ${exit_code} "Unable to clone TF-Slim repository ${tf_slim_git_repo}."
pushd . > /dev/null
cd tf-slim
pip3 install .
popd > /dev/null
rm -rf tf-slim
}
download_protoc() {
# Installs the Protobuf compiler protoc.
echo "Downloading Protobuf compiler from ${protoc_url}"
curl -L -Os ${protoc_url}
local exit_code=$?
handle_exit_code ${exit_code} "Unable to download Protobuf compiler from ${tf_slim_git_repo}."
mkdir ${protoc_folder}
local protoc_archive=`basename ${protoc_url}`
unzip ${protoc_archive} -d ${protoc_folder}
local exit_code=$?
handle_exit_code ${exit_code} "Unable to unzip Protobuf compiler from ${protoc_archive}."
rm ${protoc_archive}
}
compile_delf_protos() {
# Compiles DELF protobufs from tensorflow/models/research/delf using the potoc compiler.
echo "Compiling DELF Protobufs"
PATH_TO_PROTOC="`pwd`/${protoc_folder}"
pushd . > /dev/null
cd ../../..
${PATH_TO_PROTOC}/bin/protoc delf/protos/*.proto --python_out=.
local exit_code=$?
handle_exit_code ${exit_code} "Unable to compile DELF Protobufs."
popd > /dev/null
}
cleanup_protoc() {
# Removes the downloaded Protobuf compiler protoc after the installation of the DELF package.
echo "Cleaning up Protobuf compiler download"
rm -rf ${protoc_folder}
}
install_python_libraries() {
# Installs Python libraries upon which the DELF package has dependencies.
echo "Installing matplotlib, numpy, scikit-image, scipy and python3-tk"
pip3 install matplotlib numpy scikit-image scipy
local exit_code=$?
handle_exit_code ${exit_code} "Unable to install at least one of: matplotlib numpy scikit-image scipy."
sudo apt-get -y install python3-tk
local exit_code=$?
handle_exit_code ${exit_code} "Unable to install python3-tk."
}
install_object_detection() {
# Installs the object detection package from tensorflow/models/research.
echo "Installing object detection"
pushd . > /dev/null
cd ../../../..
export PYTHONPATH=$PYTHONPATH:`pwd`
pip3 install .
local exit_code=$?
handle_exit_code ${exit_code} "Unable to install the object_detection package."
popd > /dev/null
}
install_delf_package() {
# Installs the DELF package from tensorflow/models/research/delf/delf.
echo "Installing DELF package"
pushd . > /dev/null
cd ../../..
pip3 install -e .
local exit_code=$?
handle_exit_code ${exit_code} "Unable to install the DELF package."
popd > /dev/null
}
post_install_check() {
# Checks the DELF package has been successfully installed.
echo "Checking DELF package installation"
python3 -c 'import delf'
local exit_code=$?
handle_exit_code ${exit_code} "DELF package installation check failed."
echo "Installation successful."
}
install_delf() {
# Orchestrates DELF package installation.
install_tensorflow
install_tf_slim
download_protoc
compile_delf_protos
cleanup_protoc
install_python_libraries
install_object_detection
install_delf_package
post_install_check
}
install_delf
exit 0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment