Unverified Commit bb3f51e5 authored by Christina Floristean's avatar Christina Floristean Committed by GitHub
Browse files

Merge pull request #405 from aqlaboratory/multimer

Full multimer merge
parents ce211367 c33a0bd6
......@@ -31,7 +31,7 @@ fi
DOWNLOAD_DIR="$1"
ROOT_DIR="${DOWNLOAD_DIR}/params"
SOURCE_URL="https://storage.googleapis.com/alphafold/alphafold_params_2022-01-19.tar"
SOURCE_URL="https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar"
BASENAME=$(basename "${SOURCE_URL}")
mkdir --parents "${ROOT_DIR}"
......
......@@ -32,8 +32,8 @@ fi
DOWNLOAD_DIR="$1"
ROOT_DIR="${DOWNLOAD_DIR}/mgnify"
# Mirror of:
# ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/peptide_database/2018_12/mgy_clusters.fa.gz
SOURCE_URL="https://storage.googleapis.com/alphafold-databases/casp14_versions/mgy_clusters_2018_12.fa.gz"
# ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/peptide_database/2022_05/mgy_clusters.fa.gz
SOURCE_URL="https://storage.googleapis.com/alphafold-databases/v2.3/mgy_clusters_2022_05.fa.gz"
BASENAME=$(basename "${SOURCE_URL}")
mkdir --parents "${ROOT_DIR}"
......
#!/bin/bash
#
# Copyright 2021 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Downloads and unzips the PDB SeqRes database for AlphaFold.
#
# Usage: bash download_pdb_seqres.sh /path/to/download/directory
set -e
if [[ $# -eq 0 ]]; then
echo "Error: download directory must be provided as an input argument."
exit 1
fi
if ! command -v aria2c &> /dev/null ; then
echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
exit 1
fi
DOWNLOAD_DIR="$1"
ROOT_DIR="${DOWNLOAD_DIR}/pdb_seqres"
SOURCE_URL="ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt"
BASENAME=$(basename "${SOURCE_URL}")
mkdir --parents "${ROOT_DIR}"
aria2c "${SOURCE_URL}" --dir="${ROOT_DIR}"
# Keep only protein sequences.
grep --after-context=1 --no-group-separator '>.* mol:protein' "${ROOT_DIR}/pdb_seqres.txt" > "${ROOT_DIR}/pdb_seqres_filtered.txt"
mv "${ROOT_DIR}/pdb_seqres_filtered.txt" "${ROOT_DIR}/pdb_seqres.txt"
#!/bin/bash
#
# Copyright 2021 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Downloads, unzips and merges the SwissProt and TrEMBL databases for
# AlphaFold-Multimer.
#
# Usage: bash download_uniprot.sh /path/to/download/directory
set -e
if [[ $# -eq 0 ]]; then
echo "Error: download directory must be provided as an input argument."
exit 1
fi
if ! command -v aria2c &> /dev/null ; then
echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
exit 1
fi
DOWNLOAD_DIR="$1"
ROOT_DIR="${DOWNLOAD_DIR}/uniprot"
TREMBL_SOURCE_URL="ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz"
TREMBL_BASENAME=$(basename "${TREMBL_SOURCE_URL}")
TREMBL_UNZIPPED_BASENAME="${TREMBL_BASENAME%.gz}"
SPROT_SOURCE_URL="ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz"
SPROT_BASENAME=$(basename "${SPROT_SOURCE_URL}")
SPROT_UNZIPPED_BASENAME="${SPROT_BASENAME%.gz}"
mkdir --parents "${ROOT_DIR}"
aria2c "${TREMBL_SOURCE_URL}" --dir="${ROOT_DIR}"
aria2c "${SPROT_SOURCE_URL}" --dir="${ROOT_DIR}"
pushd "${ROOT_DIR}"
gunzip "${ROOT_DIR}/${TREMBL_BASENAME}"
gunzip "${ROOT_DIR}/${SPROT_BASENAME}"
# Concatenate TrEMBL and SwissProt, rename to uniprot and clean up.
cat "${ROOT_DIR}/${SPROT_UNZIPPED_BASENAME}" >> "${ROOT_DIR}/${TREMBL_UNZIPPED_BASENAME}"
mv "${ROOT_DIR}/${TREMBL_UNZIPPED_BASENAME}" "${ROOT_DIR}/uniprot.fasta"
rm "${ROOT_DIR}/${SPROT_UNZIPPED_BASENAME}"
popd
......@@ -30,10 +30,15 @@ if ! command -v aria2c &> /dev/null ; then
fi
DOWNLOAD_DIR="$1"
ROOT_DIR="${DOWNLOAD_DIR}"
SOURCE_URL="http://wwwuser.gwdg.de/~compbiol/colabfold/uniref30_2103.tar.gz"
ROOT_DIR="${DOWNLOAD_DIR}/uniref30"
# Mirror of:
# https://wwwuser.gwdg.de/~compbiol/uniclust/2021_03/UniRef30_2021_03.tar.gz
SOURCE_URL="https://storage.googleapis.com/alphafold-databases/v2.3/UniRef30_2021_03.tar.gz"
BASENAME=$(basename "${SOURCE_URL}")
mkdir --parents "${ROOT_DIR}"
aria2c "${SOURCE_URL}" --dir="${ROOT_DIR}" -x 4 --check-certificate=false
gunzip "${ROOT_DIR}/${BASENAME}"
tar --extract --verbose --file="${ROOT_DIR}/${BASENAME}" \
--directory="${ROOT_DIR}"
rm "${ROOT_DIR}/${BASENAME}"
......@@ -26,7 +26,12 @@ mkdir -p "${ALIGNMENT_DIR}"
for chain_dir in $(ls "${RODA_DIR}"); do
CHAIN_DIR_PATH="${RODA_DIR}/${chain_dir}"
for subdir in $(ls "${CHAIN_DIR_PATH}"); do
if [[ $subdir = "pdb" ]] || [[ $subdir = "cif" ]]; then
if [[ ! -d "$subdir" ]]; then
echo "$subdir is not directory"
continue
elif [[ -z $(ls "${subdir}")]]; then
continue
elif [[ $subdir = "pdb" ]] || [[ $subdir = "cif" ]]; then
mv "${CHAIN_DIR_PATH}/${subdir}"/* "${DATA_DIR}"
else
CHAIN_ALIGNMENT_DIR="${ALIGNMENT_DIR}/${chain_dir}"
......
......@@ -2,35 +2,62 @@ import argparse
import os
import pickle
from alphafold.data import pipeline, templates
from alphafold.data import pipeline, pipeline_multimer, templates
from alphafold.data.tools import hmmsearch, hhsearch
from scripts.utils import add_data_args
def main(args):
template_featurizer = templates.TemplateHitFeaturizer(
mmcif_dir=args.mmcif_dir,
if (args.multimer):
template_searcher = hmmsearch.Hmmsearch(
binary_path=args.hmmsearch_binary_path,
hmmbuild_binary_path=args.hmmbuild_binary_path,
database_path=args.pdb_seqres_database_path,
)
template_featurizer = templates.HmmsearchHitFeaturizer(
mmcif_dir=args.template_mmcif_dir,
max_template_date=args.max_template_date,
max_hits=20,
kalign_binary_path=args.kalign_binary_path,
release_dates_path=args.release_dates_path,
obsolete_pdbs_path=args.obsolete_pdbs_path
)
else:
template_searcher = hhsearch.HHSearch(
binary_path=args.hhsearch_binary_path,
databases=[args.pdb70_database_path],
)
template_featurizer = templates.HhsearchHitFeaturizer(
mmcif_dir=args.template_mmcif_dir,
max_template_date=args.max_template_date,
max_hits=20,
kalign_binary_path=args.kalign_binary_path,
release_dates_path=None,
obsolete_pdbs_path=args.obsolete_pdbs_path,
obsolete_pdbs_path=args.obsolete_pdbs_path
)
data_pipeline = pipeline.DataPipeline(
jackhmmer_binary_path=args.jackhmmer_binary_path,
hhblits_binary_path=args.hhblits_binary_path,
hhsearch_binary_path=args.hhsearch_binary_path,
uniref90_database_path=args.uniref90_database_path,
mgnify_database_path=args.mgnify_database_path,
bfd_database_path=args.bfd_database_path,
uniclust30_database_path=args.uniclust30_database_path,
pdb70_database_path=args.pdb70_database_path,
uniref30_database_path=args.uniref30_database_path,
small_bfd_database_path=None,
template_featurizer=template_featurizer,
template_searcher=template_searcher,
use_small_bfd=False,
)
if (args.multimer):
data_pipeline = pipeline_multimer.DataPipeline(
monomer_data_pipeline=data_pipeline,
jackhmmer_binary_path=args.jackhmmer_binary_path,
uniprot_database_path=args.uniprot_database_path)
feature_dict = data_pipeline.process(
input_fasta_path=args.fasta_path,
msa_output_dir=args.output_dir,
......@@ -44,6 +71,7 @@ if __name__ == "__main__":
parser.add_argument("fasta_path", type=str)
parser.add_argument("mmcif_dir", type=str)
parser.add_argument("output_dir", type=str)
parser.add_argument("--multimer", action='store_true')
add_data_args(parser)
args = parser.parse_args()
......
......@@ -4,10 +4,11 @@ import json
import logging
from multiprocessing import Pool
import os
import string
import sys
sys.path.append(".") # an innocent hack to get this to run from the top level
from collections import defaultdict
from tqdm import tqdm
from openfold.data.mmcif_parsing import parse
......@@ -49,20 +50,27 @@ def parse_file(
pdb_string = fp.read()
protein_object = protein.from_pdb_string(pdb_string, None)
aatype = protein_object.aatype
chain_index = protein_object.chain_index
chain_dict = {}
chain_dict["seq"] = residue_constants.aatype_to_str_sequence(
protein_object.aatype,
)
chain_dict["resolution"] = 0.
chain_dict = defaultdict(list)
for i in range(aatype.shape[0]):
chain_dict[chain_index[i]].append(residue_constants.restypes_with_x[aatype[i]])
out = {}
chain_tags = string.ascii_uppercase
for chain, seq in chain_dict.items():
full_name = "_".join([file_id, chain_tags[chain]])
out[full_name] = {}
local_data = out[full_name]
local_data["resolution"] = 0.
local_data["seq"] = ''.join(seq)
if(chain_cluster_size_dict is not None):
cluster_size = chain_cluster_size_dict.get(
full_name.upper(), -1
)
chain_dict["cluster_size"] = cluster_size
out = {file_id: chain_dict}
local_data["cluster_size"] = cluster_size
return out
......
......@@ -13,7 +13,7 @@ from tqdm import tqdm
from openfold.data.mmcif_parsing import parse
def parse_file(f, args):
def parse_file(f, args, chain_cluster_size_dict=None):
with open(os.path.join(args.mmcif_dir, f), "r") as fp:
mmcif_string = fp.read()
file_id = os.path.splitext(f)[0]
......@@ -28,6 +28,18 @@ def parse_file(f, args):
local_data["release_date"] = mmcif.header["release_date"]
chain_ids, seqs = list(zip(*mmcif.chain_to_seqres.items()))
if chain_cluster_size_dict is not None:
cluster_sizes = []
for chain_id in chain_ids:
full_name = "_".join([file_id, chain_id])
cluster_size = chain_cluster_size_dict.get(
full_name.upper(), -1
)
cluster_sizes.append(cluster_size)
local_data["cluster_sizes"] = cluster_sizes
local_data["chain_ids"] = chain_ids
local_data["seqs"] = seqs
local_data["no_chains"] = len(chain_ids)
......@@ -38,8 +50,21 @@ def parse_file(f, args):
def main(args):
chain_cluster_size_dict = None
if args.cluster_file is not None:
chain_cluster_size_dict = {}
with open(args.cluster_file, "r") as fp:
clusters = [l.strip() for l in fp.readlines()]
for cluster in clusters:
chain_ids = cluster.split()
cluster_len = len(chain_ids)
for chain_id in chain_ids:
chain_id = chain_id.upper()
chain_cluster_size_dict[chain_id] = cluster_len
files = [f for f in os.listdir(args.mmcif_dir) if ".cif" in f]
fn = partial(parse_file, args=args)
fn = partial(parse_file, args=args, chain_cluster_size_dict=chain_cluster_size_dict)
data = {}
with Pool(processes=args.no_workers) as p:
with tqdm(total=len(files)) as pbar:
......@@ -63,6 +88,15 @@ if __name__ == "__main__":
"--no_workers", type=int, default=4,
help="Number of workers to use for parsing"
)
parser.add_argument(
"--cluster_file", type=str, default=None,
help=(
"Path to a cluster file (e.g. PDB40), one cluster "
"({PROT1_ID}_{CHAIN_ID} {PROT2_ID}_{CHAIN_ID} ...) per line. "
"Chains not in this cluster file will NOT be filtered by cluster "
"size."
)
)
parser.add_argument(
"--chunksize", type=int, default=10,
help="How many files should be distributed to each worker at a time"
......
......@@ -11,6 +11,7 @@ import tempfile
import openfold.data.mmcif_parsing as mmcif_parsing
from openfold.data.data_pipeline import AlignmentRunner
from openfold.data.parsers import parse_fasta
from openfold.data.tools import hhsearch, hmmsearch
from openfold.np import protein, residue_constants
from utils import add_data_args
......@@ -39,7 +40,8 @@ def run_seq_group_alignments(seq_groups, alignment_runner, args):
alignment_runner.run(
fasta_path, alignment_dir
)
except:
except Exception as e:
logging.warning(e)
logging.warning(f"Failed to run alignments for {first_name}. Skipping...")
os.remove(fasta_path)
os.rmdir(alignment_dir)
......@@ -114,15 +116,30 @@ def parse_and_align(files, alignment_runner, args):
def main(args):
# Build the alignment tool runner
if args.hmmsearch_binary_path is not None and args.pdb_seqres_database_path is not None:
template_searcher = hmmsearch.Hmmsearch(
binary_path=args.hmmsearch_binary_path,
hmmbuild_binary_path=args.hmmbuild_binary_path,
database_path=args.pdb_seqres_database_path,
)
elif args.hhsearch_binary_path is not None and args.pdb70_database_path is not None:
template_searcher = hhsearch.HHSearch(
binary_path=args.hhsearch_binary_path,
databases=[args.pdb70_database_path],
)
else:
template_searcher = None
alignment_runner = AlignmentRunner(
jackhmmer_binary_path=args.jackhmmer_binary_path,
hhblits_binary_path=args.hhblits_binary_path,
hhsearch_binary_path=args.hhsearch_binary_path,
uniref90_database_path=args.uniref90_database_path,
mgnify_database_path=args.mgnify_database_path,
bfd_database_path=args.bfd_database_path,
uniref30_database_path=args.uniref30_database_path,
uniclust30_database_path=args.uniclust30_database_path,
pdb70_database_path=args.pdb70_database_path,
uniprot_database_path=args.uniprot_database_path,
template_searcher=template_searcher,
use_small_bfd=args.bfd_database_path is None,
no_cpus=args.cpus_per_task,
)
......
......@@ -14,9 +14,18 @@ def add_data_args(parser: argparse.ArgumentParser):
parser.add_argument(
'--pdb70_database_path', type=str, default=None,
)
parser.add_argument(
'--pdb_seqres_database_path', type=str, default=None,
)
parser.add_argument(
'--uniref30_database_path', type=str, default=None,
)
parser.add_argument(
'--uniclust30_database_path', type=str, default=None,
)
parser.add_argument(
'--uniprot_database_path', type=str, default=None,
)
parser.add_argument(
'--bfd_database_path', type=str, default=None,
)
......@@ -29,6 +38,12 @@ def add_data_args(parser: argparse.ArgumentParser):
parser.add_argument(
'--hhsearch_binary_path', type=str, default='/usr/bin/hhsearch'
)
parser.add_argument(
'--hmmsearch_binary_path', type=str, default='/usr/bin/hmmsearch'
)
parser.add_argument(
'--hmmbuild_binary_path', type=str, default='/usr/bin/hmmbuild'
)
parser.add_argument(
'--kalign_binary_path', type=str, default='/usr/bin/kalign'
)
......
......@@ -75,6 +75,8 @@ for major, minor in list(compute_capabilities):
extra_cuda_flags += cc_flag
cc_flag = ['-gencode', 'arch=compute_70,code=sm_70']
if bare_metal_major != -1:
modules = [CUDAExtension(
name="attn_core_inplace_cuda",
......@@ -111,10 +113,10 @@ else:
setup(
name='openfold',
version='1.0.1',
version='2.0.0',
description='A PyTorch reimplementation of DeepMind\'s AlphaFold 2',
author='Gustaf Ahdritz & DeepMind',
author_email='gahdritz@gmail.com',
author='OpenFold Team',
author_email='jennifer.wei@omsf.io',
license='Apache License, Version 2.0',
url='https://github.com/aqlaboratory/openfold',
packages=find_packages(exclude=["tests", "scripts"]),
......
......@@ -6,11 +6,14 @@ import sys
import unittest
import numpy as np
import torch
from openfold.config import model_config
from openfold.model.model import AlphaFold
from openfold.utils.import_weights import import_jax_weights_
from tests.config import consts
# Give JAX some GPU memory discipline
# (by default it hogs 90% of GPU memory. This disables that behavior and also
# forces it to proactively free memory that it allocates)
......@@ -57,26 +60,27 @@ def import_alphafold():
def get_alphafold_config():
config = alphafold.model.config.model_config("model_1_ptm") # noqa
config = alphafold.model.config.model_config(consts.model) # noqa
config.model.global_config.deterministic = True
return config
_param_path = "openfold/resources/params/params_model_1_ptm.npz"
dir_path = os.path.dirname(os.path.realpath(__file__))
_param_path = os.path.join(dir_path, "..", f"openfold/resources/params/params_{consts.model}.npz")
_model = None
def get_global_pretrained_openfold():
global _model
if _model is None:
_model = AlphaFold(model_config("model_1_ptm"))
_model = AlphaFold(model_config(consts.model))
_model = _model.eval()
if not os.path.exists(_param_path):
raise FileNotFoundError(
"""Cannot load pretrained parameters. Make sure to run the
installation script before running tests."""
)
import_jax_weights_(_model, _param_path, version="model_1_ptm")
import_jax_weights_(_model, _param_path, version=consts.model)
_model = _model.cuda()
return _model
......@@ -97,7 +101,7 @@ def _remove_key_prefix(d, prefix):
for k, v in list(d.items()):
if k.startswith(prefix):
d.pop(k)
d[k[len(prefix) :]] = v
d[k[len(prefix):]] = v
def fetch_alphafold_module_weights(weight_path):
......@@ -106,7 +110,6 @@ def fetch_alphafold_module_weights(weight_path):
if "/" in weight_path:
spl = weight_path.split("/")
spl = spl if len(spl[-1]) != 0 else spl[:-1]
module_name = spl[-1]
prefix = "/".join(spl[:-1]) + "/"
_remove_key_prefix(params, prefix)
......@@ -117,3 +120,20 @@ def fetch_alphafold_module_weights(weight_path):
"Make sure to call import_alphafold before running this function"
)
return params
def _assert_abs_diff_small_base(compare_func, expected, actual, eps):
# Helper function for comparing absolute differences of two torch tensors.
abs_diff = torch.abs(expected - actual)
err = compare_func(abs_diff)
zero_tensor = torch.tensor(0, dtype=err.dtype)
rtol = 1.6e-2 if err.dtype == torch.bfloat16 else 1.3e-6
torch.testing.assert_close(err, zero_tensor, atol=eps, rtol=rtol)
def assert_max_abs_diff_small(expected, actual, eps):
_assert_abs_diff_small_base(torch.max, expected, actual, eps)
def assert_mean_abs_diff_small(expected, actual, eps):
_assert_abs_diff_small_base(torch.mean, expected, actual, eps)
import ml_collections as mlc
consts = mlc.ConfigDict(
monomer_consts = mlc.ConfigDict(
{
"model": "model_1_ptm", # monomer:model_1_ptm, multimer: model_1_multimer_v3
"is_multimer": False, # monomer: False, multimer: True
"chunk_size": 4,
"batch_size": 2,
"n_res": 11,
"n_res": 22,
"n_seq": 13,
"n_templ": 3,
"n_extra": 17,
......@@ -16,9 +20,37 @@ consts = mlc.ConfigDict(
"c_s": 384,
"c_t": 64,
"c_e": 64,
"msa_logits": 23, # monomer: 23, multimer: 22
"template_mmcif_dir": None # Set for test_multimer_datamodule
}
)
multimer_consts = mlc.ConfigDict(
{
"model": "model_1_multimer_v3", # monomer:model_1_ptm, multimer: model_1_multimer_v3
"is_multimer": True, # monomer: False, multimer: True
"chunk_size": 4,
"batch_size": 2,
"n_res": 22,
"n_seq": 13,
"n_templ": 3,
"n_extra": 17,
"n_heads_extra_msa": 8,
"eps": 5e-4,
# For compatibility with DeepMind's pretrained weights, it's easiest for
# everyone if these take their real values.
"c_m": 256,
"c_z": 128,
"c_s": 384,
"c_t": 64,
"c_e": 64,
"msa_logits": 22, # monomer: 23, multimer: 22
"template_mmcif_dir": None # Set for test_multimer_datamodule
}
)
consts = monomer_consts
config = mlc.ConfigDict(
{
"data": {
......
......@@ -12,10 +12,37 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from random import randint
import torch
import numpy as np
from scipy.spatial.transform import Rotation
from tests.config import consts
def random_asym_ids(n_res, split_chains=True, min_chain_len=4):
n_chain = randint(1, n_res // min_chain_len) if consts.is_multimer else 1
if not split_chains:
return [0] * n_res
assert n_res >= n_chain
pieces = []
asym_ids = []
final_idx = n_chain - 1
for idx in range(n_chain - 1):
n_stop = (n_res - sum(pieces) - n_chain + idx - min_chain_len)
if n_stop <= min_chain_len:
final_idx = idx
break
piece = randint(min_chain_len, n_stop)
pieces.append(piece)
asym_ids.extend(piece * [idx])
asym_ids.extend((n_res - sum(pieces)) * [final_idx])
return np.array(asym_ids).astype(np.float32) + 1
def random_template_feats(n_templ, n, batch_size=None):
b = []
......@@ -40,6 +67,11 @@ def random_template_feats(n_templ, n, batch_size=None):
}
batch = {k: v.astype(np.float32) for k, v in batch.items()}
batch["template_aatype"] = batch["template_aatype"].astype(np.int64)
if consts.is_multimer:
asym_ids = np.array(random_asym_ids(n))
batch["asym_id"] = np.tile(asym_ids[np.newaxis, :], (*b, n_templ, 1))
return batch
......
>query
MGSSHHHHHHSSGLVPGSHMDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
>tr|A0A2W3M096|A0A2W3M096_STAAU Plasmid segregation protein ParR OS=Staphylococcus aureus OX=1280 GN=C7Q70_14145 PE=4 SV=1
-------------------MDKKETQHLLKIKKQDYPQIFNFLEGLPKGTKTAHIREALMRYIAEEGNTP
>tr|A0A0Q9XW80|A0A0Q9XW80_9STAP Uncharacterized protein OS=Staphylococcus sp. NAM3COL9 GN=ACA31_00310 PE=4 SV=1
-------------------MSKQETNHLLKIKKKDYPQIFEFLEGVPKGTKTAHIREALLRYIEELGAPP
>tr|A0A1E5U0W4|A0A1E5U0W4_STAXY Uncharacterized protein OS=Staphylococcus xylosus GN=AST15_04830 PE=4 SV=1
-------------------MSKQETNHLLKIKKKDYPQIFDFLENVPKGTKTAHIREALIRYINDLGDTpP
This source diff could not be displayed because it is too large. You can view the blob instead.
# STOCKHOLM 1.0
#=GS MGYP000048211747/1-51 DE [subseq from] PL=00 UP=0 BIOMES=0000000011000
#=GS MGYP000256545448/1-51 DE [subseq from] PL=00 UP=0 BIOMES=0000000011000
#=GS MGYP000517307434/104-157 DE [subseq from] PL=11 UP=0 BIOMES=0000000011000
#=GS MGYP000971940026/195-224 DE [subseq from] PL=10 UP=0 BIOMES=0110000000000
#=GS MGYP000859660985/46-74 DE [subseq from] PL=10 UP=0 BIOMES=0110000000000
#=GS MGYP000859660985/83-111 DE [subseq from] PL=10 UP=0 BIOMES=0110000000000
query MGSSHHHHHHSSGLVPGSHMDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
MGYP000048211747/1-51 -------------------MDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
MGYP000256545448/1-51 -------------------MKKKETQHLLKIKKEDYPQIFDFLEGLPRGTKTAHIREALLRYIADEGENP
MGYP000517307434/104-157 ----------------GDLLRQKETQHLLKIKKEDYPQIFDFLEGLPRGTKTAHIREALLRYIADEGENP
MGYP000971940026/195-224 ------------------------------VKKSDLGQVTSFLKEVPEGKKQDVLDEVLK----------
MGYP000859660985/46-74 ------------------------------IKKSDLGQVASFLKEVPEGQKQEVLDQVL-----------
MGYP000859660985/83-111 ------------------------------IKKSDLGQVASFLKEVPEGQKQEVLDQVL-----------
#=GC RF xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
//
# STOCKHOLM 1.0
#=GS tr|A0A0K0ME10|A0A0K0ME10_9STAP/1-51 DE [subseq from] Plasmid segregation protein ParR OS=Staphylococcus schleiferi OX=1295 GN=NP71_p00120 PE=4 SV=1
#=GS tr|A0A0C5BVQ8|A0A0C5BVQ8_STAAU/1-51 DE [subseq from] Plasmid segregation protein ParR OS=Staphylococcus aureus OX=1280 GN=parR PE=4 SV=1
#=GS tr|A0A0D4ZYK6|A0A0D4ZYK6_STAEP/1-51 DE [subseq from] DNA-binding protein ParR OS=Staphylococcus epidermidis OX=1282 GN=parR PE=4 SV=1
#=GS tr|A0A0H2XKQ4|A0A0H2XKQ4_STAA3/1-51 DE [subseq from] Plasmid segregation protein ParR OS=Staphylococcus aureus (strain USA300) OX=367830 GN=SAUSA300_pUSA030035 PE=4 SV=1
#=GS tr|A0A0N9NJL4|A0A0N9NJL4_STAPS/1-51 DE [subseq from] Putative plasmid segregation protein ParR OS=Staphylococcus pseudintermedius OX=283734 GN=parR PE=4 SV=1
#=GS tr|A0A0U2CJ65|A0A0U2CJ65_STAEP/1-51 DE [subseq from] Plasmid segregation protein OS=Staphylococcus epidermidis OX=1282 GN=parR PE=4 SV=1
#=GS tr|A0A133QXU6|A0A133QXU6_STASI/1-51 DE [subseq from] Plasmid segregation protein ParR OS=Staphylococcus simulans OX=1286 GN=HMPREF3215_00002 PE=4 SV=1
#=GS tr|A0A141BHY3|A0A141BHY3_STAXY/1-51 DE [subseq from] DNA-binding protein OS=Staphylococcus xylosus OX=1288 GN=p11 PE=4 SV=1
#=GS tr|A0A141HMK9|A0A141HMK9_STAA8/1-51 DE [subseq from] ParR OS=Staphylococcus aureus subsp. aureus RN4220 OX=561307 GN=pGO400_p33 PE=4 SV=1
#=GS tr|A0A1B1UXS0|A0A1B1UXS0_STALU/1-51 DE [subseq from] Plasmid segregation protein ParR OS=Staphylococcus lugdunensis OX=28035 GN=parR PE=4 SV=1
#=GS tr|A0A1S7BGJ1|A0A1S7BGJ1_STAAU/1-51 DE [subseq from] DNA-binding protein ParR OS=Staphylococcus aureus OX=1280 GN=parR PE=4 SV=1
#=GS tr|A0A418HED5|A0A418HED5_STAGA/1-51 DE [subseq from] Plasmid segregation protein ParR OS=Staphylococcus gallinarum OX=1293 GN=BUY97_07835 PE=4 SV=1
#=GS tr|A0A507SJ94|A0A507SJ94_9STAP/1-51 DE [subseq from] Plasmid segregation protein ParR OS=Staphylococcus sp. SKL71187 OX=2497688 GN=EKV43_01520 PE=4 SV=1
#=GS tr|A0A6N0I4W4|A0A6N0I4W4_STAHO/1-51 DE [subseq from] Plasmid segregation protein ParR OS=Staphylococcus hominis OX=1290 GN=FOB69_12695 PE=4 SV=1
#=GS tr|A0A7G3T6L6|A0A7G3T6L6_9STAP/1-51 DE [subseq from] Plasmid segregation protein OS=Staphylococcus equorum OX=246432 PE=4 SV=1
#=GS tr|A0A848F022|A0A848F022_STACP/1-51 DE [subseq from] Plasmid segregation protein ParR OS=Staphylococcus capitis OX=29388 GN=HHM13_04665 PE=4 SV=1
#=GS tr|O87365|O87365_STAAU/1-51 DE [subseq from] Conserved domain protein OS=Staphylococcus aureus OX=1280 GN=parR PE=1 SV=1
#=GS tr|A0A7G3L2E1|A0A7G3L2E1_STAAU/1-51 DE [subseq from] Plasmid segregation protein ParR OS=Staphylococcus aureus OX=1280 PE=4 SV=1
#=GS tr|E4PYH1|E4PYH1_STAAU/1-39 DE [subseq from] DUF655 domain-containing protein OS=Staphylococcus aureus OX=1280 GN=SUM_0041p2 PE=4 SV=1
#=GS tr|A0A0Q9XW80|A0A0Q9XW80_9STAP/1-51 DE [subseq from] RHH_1 domain-containing protein OS=Staphylococcus sp. NAM3COL9 OX=1667172 GN=ACA31_00310 PE=4 SV=1
query MGSSHHHHHHSSGLVPGSHMDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
tr|A0A0K0ME10|A0A0K0ME10_9STAP/1-51 -------------------MDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
tr|A0A0C5BVQ8|A0A0C5BVQ8_STAAU/1-51 -------------------MDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
tr|A0A0D4ZYK6|A0A0D4ZYK6_STAEP/1-51 -------------------MDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
tr|A0A0H2XKQ4|A0A0H2XKQ4_STAA3/1-51 -------------------MDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
tr|A0A0N9NJL4|A0A0N9NJL4_STAPS/1-51 -------------------MDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
tr|A0A0U2CJ65|A0A0U2CJ65_STAEP/1-51 -------------------MDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
tr|A0A133QXU6|A0A133QXU6_STASI/1-51 -------------------MDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
tr|A0A141BHY3|A0A141BHY3_STAXY/1-51 -------------------MDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
tr|A0A141HMK9|A0A141HMK9_STAA8/1-51 -------------------MDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
tr|A0A1B1UXS0|A0A1B1UXS0_STALU/1-51 -------------------MDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
tr|A0A1S7BGJ1|A0A1S7BGJ1_STAAU/1-51 -------------------MDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
tr|A0A418HED5|A0A418HED5_STAGA/1-51 -------------------MDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
tr|A0A507SJ94|A0A507SJ94_9STAP/1-51 -------------------MDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
tr|A0A6N0I4W4|A0A6N0I4W4_STAHO/1-51 -------------------MDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
tr|A0A7G3T6L6|A0A7G3T6L6_9STAP/1-51 -------------------MDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
tr|A0A848F022|A0A848F022_STACP/1-51 -------------------MDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
tr|O87365|O87365_STAAU/1-51 -------------------MDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
tr|A0A7G3L2E1|A0A7G3L2E1_STAAU/1-51 -------------------MDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
tr|E4PYH1|E4PYH1_STAAU/1-39 -------------------MDKKETKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREA------------
tr|A0A0Q9XW80|A0A0Q9XW80_9STAP/1-51 -------------------MSKQETNHLLKIKKKDYPQIFEFLEGVPKGTKTAHIREALLRYIEELGAPP
#=GC RF xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
//
# STOCKHOLM 1.0
#=GS UniRef90_A0A141BHY3/1-51 DE [subseq from] DNA-binding protein n=37 Tax=Staphylococcaceae TaxID=90964 RepID=A0A141BHY3_STAXY
#=GS UniRef90_UPI000A061283/1-51 DE [subseq from] plasmid segregation protein ParR n=1 Tax=Mammaliicoccus sciuri TaxID=1296 RepID=UPI000A061283
#=GS UniRef90_UPI001E649B27/1-51 DE [subseq from] plasmid segregation protein ParR n=1 Tax=Mammaliicoccus sciuri TaxID=1296 RepID=UPI001E649B27
#=GS UniRef90_UPI00201A2D50/1-51 DE [subseq from] plasmid segregation protein ParR n=1 Tax=Staphylococcus aureus TaxID=1280 RepID=UPI00201A2D50
#=GS UniRef90_UPI0018EDBA69/1-51 DE [subseq from] plasmid segregation protein ParR n=1 Tax=Staphylococcus aureus TaxID=1280 RepID=UPI0018EDBA69
#=GS UniRef90_UPI0005E12F5A/1-51 DE [subseq from] plasmid segregation protein ParR n=1 Tax=Staphylococcus TaxID=1279 RepID=UPI0005E12F5A
#=GS UniRef90_UPI00207B21F3/1-51 DE [subseq from] plasmid segregation protein ParR n=2 Tax=Staphylococcus TaxID=1279 RepID=UPI00207B21F3
#=GS UniRef90_UPI0009836679/1-51 DE [subseq from] plasmid segregation protein ParR n=2 Tax=Staphylococcus aureus TaxID=1280 RepID=UPI0009836679
#=GS UniRef90_UPI001F5439CD/1-51 DE [subseq from] plasmid segregation protein ParR n=11 Tax=Staphylococcaceae TaxID=90964 RepID=UPI001F5439CD
#=GS UniRef90_UPI000DA9B884/1-51 DE [subseq from] plasmid segregation protein ParR n=3 Tax=Bacillales TaxID=1385 RepID=UPI000DA9B884
#=GS UniRef90_A0A0Q9XW80/1-51 DE [subseq from] RHH_1 domain-containing protein n=1 Tax=Staphylococcus sp. NAM3COL9 TaxID=1667172 RepID=A0A0Q9XW80_9STAP
#=GS UniRef90_UPI001CCC4088/3-48 DE [subseq from] plasmid segregation protein ParR n=1 Tax=Macrococcus armenti TaxID=2875764 RepID=UPI001CCC4088
#=GS UniRef90_UPI0014612D4C/1-49 DE [subseq from] De novo designed WSHC6 n=2 Tax=synthetic construct TaxID=32630 RepID=UPI0014612D4C
#=GS UniRef90_UPI000B802FE5/1-42 DE [subseq from] HEEH_rd4_0097 n=1 Tax=Escherichia coli TaxID=562 RepID=UPI000B802FE5
#=GS UniRef90_UPI001E281CEB/1-54 DE [subseq from] Network hallucinated protein 0738_mod n=1 Tax=synthetic construct TaxID=32630 RepID=UPI001E281CEB
query MGSSHHHHHHSSGLVP-GSHMDKKE-TKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
UniRef90_A0A141BHY3/1-51 --------------------MDKKE-TKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEIGENP
UniRef90_UPI000A061283/1-51 --------------------MDKKE-TKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEMGENP
UniRef90_UPI001E649B27/1-51 --------------------MDKKE-TKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEMGDNP
UniRef90_UPI00201A2D50/1-51 --------------------MEKKE-TKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALRRYIEEMGDNP
UniRef90_UPI0018EDBA69/1-51 --------------------MDKKE-TKHLLKIKKEDYPQIFDFLENVPRGTKTAHIREALLRYIEEFGENP
UniRef90_UPI0005E12F5A/1-51 --------------------MKKKE-TQHLLKIKKEDYPQIFDFLEGLPRGTKTAHIREALLRYIADEGENP
UniRef90_UPI00207B21F3/1-51 --------------------MSKQE-TNHLLKIKKEDYPQIFDFLENVPKGTKTAHIREALIRYINDLGGSP
UniRef90_UPI0009836679/1-51 --------------------MDKKE-TQHLLKIKKQDYPQIFNFLEGLPKGTKTAHIREALMRYIAEEGQNP
UniRef90_UPI001F5439CD/1-51 --------------------MSKQE-TNHLLKIKKKDYPQIFDFLENVPKGTKTAHIREALIRYINDLGGTP
UniRef90_UPI000DA9B884/1-51 --------------------MDKKE-TQHLLKIKKQDYPQIFNFLEGLPKGTKTAHIREALMRYIAEEGNTP
UniRef90_A0A0Q9XW80/1-51 --------------------MSKQE-TNHLLKIKKKDYPQIFEFLEGVPKGTKTAHIREALLRYIEELGAPP
UniRef90_UPI001CCC4088/3-48 ----------------------KEV-NQTLLKIDKAEYPEIYDFLENVPRGTKTAHIREALIRYINDIN---
UniRef90_UPI0014612D4C/1-49 MGSSHHHHHHSSGLVPRGSHMTEDE-IRKLRKLLEEAEKKLYKLEDKTRR----------------------
UniRef90_UPI000B802FE5/1-42 MGSSHHHHHHSSGLVPRGSHMDVEEQIRRLEEVLKKNQPVTW------------------------------
UniRef90_UPI001E281CEB/1-54 MGSSHHHHHHSSGLVPRGSHMNIQV-SLQWE---DPKKGKVFSHTVNIPPGGTAEQIA--------------
#=GC RF xxxxxxxxxxxxxxxx.xxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
//
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment