download_uniprot.sh 2.16 KB
Newer Older
zhuwenwen's avatar
zhuwenwen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/bin/bash
#
# Copyright 2021 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Downloads, unzips and merges the SwissProt and TrEMBL databases for
# AlphaFold-Multimer.
#
# Usage: bash download_uniprot.sh /path/to/download/directory
set -e

if [[ $# -eq 0 ]]; then
    echo "Error: download directory must be provided as an input argument."
    exit 1
fi

if ! command -v aria2c &> /dev/null ; then
    echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
    exit 1
fi

DOWNLOAD_DIR="$1"
ROOT_DIR="${DOWNLOAD_DIR}/uniprot"

bailuo's avatar
bailuo committed
36
37
# TREMBL_SOURCE_URL="ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz"
TREMBL_SOURCE_URL="http://113.200.138.88:18080/aidatasets/project-dependency/alphafold/-/raw/master/uniprot.tar.gz?ref_type=heads&inline=false"
zhuwenwen's avatar
zhuwenwen committed
38
TREMBL_BASENAME=$(basename "${TREMBL_SOURCE_URL}")
bailuo's avatar
bailuo committed
39
# TREMBL_UNZIPPED_BASENAME="${TREMBL_BASENAME%.gz}"
zhuwenwen's avatar
zhuwenwen committed
40

bailuo's avatar
bailuo committed
41
42
43
# SPROT_SOURCE_URL="ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz"
# SPROT_BASENAME=$(basename "${SPROT_SOURCE_URL}")
# SPROT_UNZIPPED_BASENAME="${SPROT_BASENAME%.gz}"
zhuwenwen's avatar
zhuwenwen committed
44
45
46

mkdir --parents "${ROOT_DIR}"
aria2c "${TREMBL_SOURCE_URL}" --dir="${ROOT_DIR}"
bailuo's avatar
bailuo committed
47
# aria2c "${SPROT_SOURCE_URL}" --dir="${ROOT_DIR}"
zhuwenwen's avatar
zhuwenwen committed
48
49
pushd "${ROOT_DIR}"
gunzip "${ROOT_DIR}/${TREMBL_BASENAME}"
bailuo's avatar
bailuo committed
50
# gunzip "${ROOT_DIR}/${SPROT_BASENAME}"
zhuwenwen's avatar
zhuwenwen committed
51
52

# Concatenate TrEMBL and SwissProt, rename to uniprot and clean up.
bailuo's avatar
bailuo committed
53
54
55
# cat "${ROOT_DIR}/${SPROT_UNZIPPED_BASENAME}" >> "${ROOT_DIR}/${TREMBL_UNZIPPED_BASENAME}"
# mv "${ROOT_DIR}/${TREMBL_UNZIPPED_BASENAME}" "${ROOT_DIR}/uniprot.fasta"
# rm "${ROOT_DIR}/${SPROT_UNZIPPED_BASENAME}"
zhuwenwen's avatar
zhuwenwen committed
56
popd