Unverified Commit e7eadc44 authored by Dingquan Yu's avatar Dingquan Yu Committed by GitHub
Browse files

Merge pull request #4 from dingquanyu/multimer-dataloader

created Multimer dataloader and datamodule classes
parents 585136e4 dbc0b085
This diff is collapsed.
...@@ -803,7 +803,8 @@ class DataPipeline: ...@@ -803,7 +803,8 @@ class DataPipeline:
def _parse_template_hits( def _parse_template_hits(
self, self,
alignment_dir: str, alignment_dir: str,
alignment_index: Optional[Any] = None alignment_index: Optional[Any] = None,
input_sequence=None,
) -> Mapping[str, Any]: ) -> Mapping[str, Any]:
all_hits = {} all_hits = {}
if (alignment_index is not None): if (alignment_index is not None):
...@@ -830,6 +831,15 @@ class DataPipeline: ...@@ -830,6 +831,15 @@ class DataPipeline:
with open(path, "r") as fp: with open(path, "r") as fp:
hits = parsers.parse_hhr(fp.read()) hits = parsers.parse_hhr(fp.read())
all_hits[f] = hits all_hits[f] = hits
fp.close()
elif (ext =='.sto') and (f.startswith("pdb")):
with open(path,"r") as fp:
hits = parsers.parse_hmmsearch_sto(fp.read(),input_sequence)
all_hits[f] = hits
fp.close()
return all_hits
def _get_msas(self, def _get_msas(self,
alignment_dir: str, alignment_dir: str,
...@@ -937,7 +947,7 @@ class DataPipeline: ...@@ -937,7 +947,7 @@ class DataPipeline:
input_sequence = mmcif.chain_to_seqres[chain_id] input_sequence = mmcif.chain_to_seqres[chain_id]
hits = self._parse_template_hits( hits = self._parse_template_hits(
alignment_dir, alignment_dir,
alignment_index) alignment_index,input_sequence)
template_features = make_template_features( template_features = make_template_features(
input_sequence, input_sequence,
...@@ -986,7 +996,7 @@ class DataPipeline: ...@@ -986,7 +996,7 @@ class DataPipeline:
hits = self._parse_template_hits( hits = self._parse_template_hits(
alignment_dir, alignment_dir,
alignment_index alignment_index,input_sequence
) )
template_features = make_template_features( template_features = make_template_features(
...@@ -1018,7 +1028,7 @@ class DataPipeline: ...@@ -1018,7 +1028,7 @@ class DataPipeline:
hits = self._parse_template_hits( hits = self._parse_template_hits(
alignment_dir, alignment_dir,
alignment_index alignment_index,input_sequence
) )
template_features = make_template_features( template_features = make_template_features(
...@@ -1107,7 +1117,7 @@ class DataPipeline: ...@@ -1107,7 +1117,7 @@ class DataPipeline:
alignment_dir = os.path.join( alignment_dir = os.path.join(
super_alignment_dir, desc super_alignment_dir, desc
) )
hits = self._parse_template_hits(alignment_dir, alignment_index=None) hits = self._parse_template_hits(alignment_dir, alignment_index=None,input_sequence=input_sequence)
template_features = make_template_features( template_features = make_template_features(
seq, seq,
hits, hits,
......
...@@ -134,8 +134,8 @@ class FeaturePipeline: ...@@ -134,8 +134,8 @@ class FeaturePipeline:
mode: str = "train", mode: str = "train",
is_multimer: bool = False, is_multimer: bool = False,
) -> FeatureDict: ) -> FeatureDict:
if(is_multimer and mode != "predict"): # if(is_multimer and mode != "predict"):
raise ValueError("Multimer mode is not currently trainable") # raise ValueError("Multimer mode is not currently trainable")
return np_example_to_features( return np_example_to_features(
np_example=raw_features, np_example=raw_features,
......
...@@ -104,7 +104,7 @@ def ensembled_transform_fns(common_cfg, mode_cfg, ensemble_seed): ...@@ -104,7 +104,7 @@ def ensembled_transform_fns(common_cfg, mode_cfg, ensemble_seed):
# the masked locations and secret corrupted locations. # the masked locations and secret corrupted locations.
transforms.append( transforms.append(
data_transforms.make_masked_msa( data_transforms.make_masked_msa(
common_cfg.masked_msa, mode_cfg.masked_msa_replace_fraction common_cfg.masked_msa, mode_cfg.masked_msa_replace_fraction,seed=msa_seed
) )
) )
......
...@@ -1667,7 +1667,7 @@ def chain_center_of_mass_loss( ...@@ -1667,7 +1667,7 @@ def chain_center_of_mass_loss(
all_atom_positions = all_atom_positions[..., ca_pos, :] all_atom_positions = all_atom_positions[..., ca_pos, :]
all_atom_mask = all_atom_mask[..., ca_pos: (ca_pos + 1)] # keep dim all_atom_mask = all_atom_mask[..., ca_pos: (ca_pos + 1)] # keep dim
chains, _ = asym_id.unique(return_counts=True) chains, _ = asym_id.unique(return_counts=True)
one_hot = torch.nn.functional.one_hot(asym_id.to(torch.int64), one_hot = torch.nn.functional.one_hot(asym_id.to(torch.int64)-1, # have to reduce asym_id by one because class values must be smaller than num_classes
num_classes=chains.shape[0]).to(dtype=all_atom_mask.dtype) # make sure asym_id dtype is int num_classes=chains.shape[0]).to(dtype=all_atom_mask.dtype) # make sure asym_id dtype is int
one_hot = one_hot * all_atom_mask one_hot = one_hot * all_atom_mask
chain_pos_mask = one_hot.transpose(-2, -1) chain_pos_mask = one_hot.transpose(-2, -1)
...@@ -1749,7 +1749,7 @@ def get_optimal_transform( ...@@ -1749,7 +1749,7 @@ def get_optimal_transform(
src_atoms = torch.zeros((1, 3), device=src_atoms.device).float() src_atoms = torch.zeros((1, 3), device=src_atoms.device).float()
tgt_atoms = src_atoms tgt_atoms = src_atoms
else: else:
src_atoms = src_atoms[mask, :] src_atoms = src_atoms.to('cuda:0')[mask, :]
tgt_atoms = tgt_atoms.to('cuda:0')[mask, :] tgt_atoms = tgt_atoms.to('cuda:0')[mask, :]
src_center = src_atoms.mean(-2, keepdim=True) src_center = src_atoms.mean(-2, keepdim=True)
tgt_center = tgt_atoms.mean(-2, keepdim=True) tgt_center = tgt_atoms.mean(-2, keepdim=True)
...@@ -1857,7 +1857,6 @@ def greedy_align( ...@@ -1857,7 +1857,6 @@ def greedy_align(
best_idx = None best_idx = None
cur_asym_list = entity_2_asym_list[int(cur_entity_ids)] cur_asym_list = entity_2_asym_list[int(cur_entity_ids)]
cur_residue_index = per_asym_residue_index[int(cur_asym_id)] cur_residue_index = per_asym_residue_index[int(cur_asym_id)]
cur_pred_pos = pred_ca_pos[asym_mask] cur_pred_pos = pred_ca_pos[asym_mask]
cur_pred_mask = pred_ca_mask[asym_mask] cur_pred_mask = pred_ca_mask[asym_mask]
for next_asym_id in cur_asym_list: for next_asym_id in cur_asym_list:
...@@ -1881,35 +1880,28 @@ def greedy_align( ...@@ -1881,35 +1880,28 @@ def greedy_align(
return align return align
def merge_labels(batch, per_asym_residue_index, labels, align): def merge_labels(per_asym_residue_index, labels, align):
""" """
batch: per_asym_residue_index: A dictionary that record which asym_id corresponds to which regions of residues in the multimer complex.
labels: list of label dicts, each with shape [nk, *] labels: list of original ground truth feats
align: list of int, such as [2, None, 0, 1], each entry specify the corresponding label of the asym. align: list of tuples, each entry specify the corresponding label of the asym.
""" """
num_res = batch["msa_mask"].shape[-1]
outs = {} outs = {}
for k, v in labels[0].items(): for k, v in labels[0].items():
if k in [
"resolution",
]:
continue
cur_out = {} cur_out = {}
for i, j in align: for i, j in align:
label = labels[j][k] label = labels[j][k]
cur_num_res = labels[j]['aatype'].shape[-1]
# to 1-based # to 1-based
cur_residue_index = per_asym_residue_index[i + 1] cur_residue_index = per_asym_residue_index[i + 1]
cur_out[i] = label[cur_residue_index] if len(v.shape)==0 or "template" in k:
continue
else:
dimension_to_merge = label.shape.index(cur_num_res) if cur_num_res in label.shape else 0
cur_out[i] = label.index_select(dimension_to_merge,cur_residue_index)
cur_out = [x[1] for x in sorted(cur_out.items())] cur_out = [x[1] for x in sorted(cur_out.items())]
new_v = torch.concat(cur_out, dim=0) if len(cur_out)>0:
merged_nres = new_v.shape[0] new_v = torch.concat(cur_out, dim=dimension_to_merge)
assert (
merged_nres <= num_res
), f"bad merged num res: {merged_nres} > {num_res}. something is wrong."
if merged_nres < num_res: # must pad
pad_dim = new_v.shape[1:]
pad_v = new_v.new_zeros((num_res - merged_nres, *pad_dim))
new_v = torch.concat((new_v, pad_v), dim=0)
outs[k] = new_v outs[k] = new_v
return outs return outs
...@@ -2050,7 +2042,6 @@ class AlphaFoldMultimerLoss(AlphaFoldLoss): ...@@ -2050,7 +2042,6 @@ class AlphaFoldMultimerLoss(AlphaFoldLoss):
true_ca_masks = [ true_ca_masks = [
l["all_atom_mask"][..., ca_idx].float() for l in labels l["all_atom_mask"][..., ca_idx].float() for l in labels
] # list([nres,]) ] # list([nres,])
unique_asym_ids = torch.unique(batch["asym_id"]) unique_asym_ids = torch.unique(batch["asym_id"])
per_asym_residue_index = {} per_asym_residue_index = {}
...@@ -2059,7 +2050,6 @@ class AlphaFoldMultimerLoss(AlphaFoldLoss): ...@@ -2059,7 +2050,6 @@ class AlphaFoldMultimerLoss(AlphaFoldLoss):
per_asym_residue_index[int(cur_asym_id)] = batch["residue_index"][asym_mask] per_asym_residue_index[int(cur_asym_id)] = batch["residue_index"][asym_mask]
anchor_gt_asym, anchor_pred_asym = get_least_asym_entity_or_longest_length(batch) anchor_gt_asym, anchor_pred_asym = get_least_asym_entity_or_longest_length(batch)
print(f"anchor_gt_asym is : {anchor_gt_asym} and anchor_pred_asym is {anchor_pred_asym}")
anchor_gt_idx = int(anchor_gt_asym) - 1 anchor_gt_idx = int(anchor_gt_asym) - 1
unique_entity_ids = torch.unique(batch["entity_id"]) unique_entity_ids = torch.unique(batch["entity_id"])
...@@ -2100,16 +2090,13 @@ class AlphaFoldMultimerLoss(AlphaFoldLoss): ...@@ -2100,16 +2090,13 @@ class AlphaFoldMultimerLoss(AlphaFoldLoss):
del aligned_true_ca_poses del aligned_true_ca_poses
del r,x del r,x
gc.collect() gc.collect()
print(f"finished multi-chain permutation and final align is {align}")
merged_labels = merge_labels( merged_labels = merge_labels(
batch,
per_asym_residue_index, per_asym_residue_index,
labels, labels,
align, align,
) )
print(f"finished multi-chain permutation and final align is {align}")
return merged_labels return merged_labels
def forward(self,out,batch,_return_breakdown=False): def forward(self,out,batch,_return_breakdown=False):
...@@ -2122,9 +2109,9 @@ class AlphaFoldMultimerLoss(AlphaFoldLoss): ...@@ -2122,9 +2109,9 @@ class AlphaFoldMultimerLoss(AlphaFoldLoss):
batch: a pair of input features and its corresponding ground truth structure batch: a pair of input features and its corresponding ground truth structure
""" """
features,labels = batch features,labels = batch
features['resolution'] = labels[2]['resolution'] # firstly update the resolution feature
# first remove the recycling dimention of input features # first remove the recycling dimention of input features
features = tensor_tree_map(lambda t: t[..., -1], features) features = tensor_tree_map(lambda t: t[..., -1], features)
features['resolution'] = labels[0]['resolution']
# then permutate ground truth chains before calculating the loss # then permutate ground truth chains before calculating the loss
permutated_labels = self.multi_chain_perm_align(out,features,labels) permutated_labels = self.multi_chain_perm_align(out,features,labels)
permutated_labels.pop('aatype') permutated_labels.pop('aatype')
......
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
>query
EAYKKAKQASQDAEQAAKDAENASKEAEEAAKEAVNLK
>tr|A0A024VQI0|A0A024VQI0_PLAFA Uncharacterized protein OS=Plasmodium falciparum FCH/4 GN=PFFCH_02010 PE=4 SV=1
EAYKKAKEASQDAEKAAEEAEKAAEQAEQASKDAEKLK
>tr|A0A151L2U9|A0A151L2U9_PLARE Merozoite surface protein 3 (Fragment) OS=Plasmodium reichenowi GN=PRSY57_0014700A PE=4 SV=1
-KAKKASEDAEKVKKASEDaekaakdAENASKEAEEAAQEAVKLK
>tr|O15855|O15855_PLAFA Merozoite surface protein 3 (Fragment) OS=Plasmodium falciparum GN=SPAM PE=4 SV=1
-KAKKASEDAEKAA-----ndAENASKEAEEAAKEAVNLK
>tr|W7JBM4|W7JBM4_PLAFA Uncharacterized protein OS=Plasmodium falciparum UGT5.1 GN=C923_03131 PE=4 SV=1
-KAKKASEDAGNASKEAE---EASCHKQNILY------
>tr|A0A1D8H334|A0A1D8H334_PLAFA Merozoite surface protein-3 (Fragment) OS=Plasmodium falciparum OX=5833 PE=4 SV=1
------KKASEDAEKAANDAENASKEAEEASKEAVNLK
>tr|A0A2P9DFY8|A0A2P9DFY8_PLARE Merozoite surface protein 3 OS=Plasmodium reichenowi OX=5854 GN=PRG01_1033900 PE=4 SV=1
DAYKKAKEAAEGAEKAAEEAEKAAEQAEQASKDAEKLK
>tr|A0A2P9GM08|A0A2P9GM08_9APIC Merozoite surface protein 3 OS=Plasmodium sp. DRC-Itaito OX=720590 GN=PBILCG01_1034200 PE=4 SV=1
ETYKKVKQASQDAEQAAEKAEKAAKDAEQASKDAEKLK
>tr|A0A2S1Q7P9|A0A2S1Q7P9_PLAFA Merozoite surface protein 3 (Fragment) OS=Plasmodium falciparum OX=5833 PE=4 SV=1
EAYKKAKQASQDAEQAAKDAEQAAKDAEQASKDAEKLK
>tr|A0A2P9C2Y3|A0A2P9C2Y3_9APIC Merozoite surface protein 3 OS=Plasmodium gaboni OX=647221 GN=PGABG01_1033300 PE=4 SV=1
---IIVKKEAQKAKEAAQTAEkekdisEN---AKNSAVSAKSSK
>tr|A0A060RU34|A0A060RU34_PLARE Merozoite surface protein 3 OS=Plasmodium reichenowi OX=5854 GN=MSP3 PE=4 SV=1
---LKAKKASEDAEKA---AEdaeNASKEAEEAAQEAVKLK
>tr|W4J2M7|W4J2M7_PLAFP Uncharacterized protein OS=Plasmodium falciparum (isolate Palo Alto / Uganda) OX=57270 GN=PFUGPA_01621 PE=4 SV=1
---LKAKKASEDAGNAS-------KEAEEASWFRVQ--
This diff is collapsed.
# STOCKHOLM 1.0
#=GS MGYP000005532851/48-76 DE [subseq from] PL=00 UP=0 BIOMES=0110000000000
#=GS MGYP000005532851/81-108 DE [subseq from] PL=00 UP=0 BIOMES=0110000000000
#=GS MGYP000005532851/110-139 DE [subseq from] PL=00 UP=0 BIOMES=0110000000000
#=GS MGYP000091056102/48-76 DE [subseq from] PL=01 UP=0 BIOMES=0110000000000
#=GS MGYP000091056102/81-108 DE [subseq from] PL=01 UP=0 BIOMES=0110000000000
#=GS MGYP000091056102/110-139 DE [subseq from] PL=01 UP=0 BIOMES=0110000000000
#=GS MGYP000091056102/174-199 DE [subseq from] PL=01 UP=0 BIOMES=0110000000000
#=GS MGYP000091056102/252-279 DE [subseq from] PL=01 UP=0 BIOMES=0110000000000
query EAYKKAKQASQDAEQAAKDAENASKEAEEAAKEAVNLK
MGYP000005532851/48-76 -----AKQASLDAKQASVDAKHESVVAKQASLEA----
MGYP000005532851/81-108 ---VVAKQASVDAKQASVDAKHESVVAKQAS-------
MGYP000005532851/110-139 ----VAKQASLDAKQASLDAKQASLDAKQASVDA----
MGYP000091056102/48-76 -----AKQASLDAKQASVDAKHESVVAKQASLEA----
MGYP000091056102/81-108 ---VVAKQASVDAKQASVDAKHESVVAKQAS-------
MGYP000091056102/110-139 ----VAKQASLDAKQASLDAKQASLDAKQASVDA----
MGYP000091056102/174-199 -----AKQASLDAKQASVDAKHESVVAKQAS-------
MGYP000091056102/252-279 ------KQASVDAKQASVDAKQASVDAKQASVDA----
#=GC RF xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
//
This diff is collapsed.
# STOCKHOLM 1.0
#=GS UniRef90_Q9NIG6/58-95 DE [subseq from] Merozoite surface protein 3 (Fragment) n=1 Tax=Plasmodium falciparum TaxID=5833 RepID=Q9NIG6_PLAFA
#=GS UniRef90_A0A024VYN0/90-127 DE [subseq from] Merozoite surface protein 3 n=52 Tax=Plasmodium falciparum TaxID=5833 RepID=A0A024VYN0_PLAFA
#=GS UniRef90_UPI0019011389/487-495 DE [subseq from] potassium voltage-gated channel subfamily C member 1-like n=1 Tax=Salvelinus namaycush TaxID=8040 RepID=UPI0019011389
#=GS UniRef90_O15863/57-94 DE [subseq from] Merozoite surface protein 3 (Fragment) n=10 Tax=Plasmodium falciparum TaxID=5833 RepID=O15863_PLAFA
#=GS UniRef90_A0A6J8EZW5/149-173 DE [subseq from] Secreted protein n=1 Tax=Mytilus coruscus TaxID=42192 RepID=A0A6J8EZW5_MYTCO
#=GS UniRef90_A0A8C3V826/99-123 DE [subseq from] Translation initiation factor IF-2 n=1 Tax=Catharus ustulatus TaxID=91951 RepID=A0A8C3V826_CATUS
#=GS UniRef90_A0A8C3V826/170-202 DE [subseq from] Translation initiation factor IF-2 n=1 Tax=Catharus ustulatus TaxID=91951 RepID=A0A8C3V826_CATUS
#=GS UniRef90_O15854/44-77 DE [subseq from] Merozoite surface protein 3 (Fragment) n=2 Tax=Plasmodium falciparum TaxID=5833 RepID=O15854_PLAFA
#=GS UniRef90_A0A401U021/46-73 DE [subseq from] CG2 omega repeat (Fragment) n=1 Tax=Chiloscyllium punctatum TaxID=137246 RepID=A0A401U021_CHIPU
#=GS UniRef90_UPI001F20CBA7/9-40 DE [subseq from] hypothetical protein n=1 Tax=Niallia circulans TaxID=1397 RepID=UPI001F20CBA7
#=GS UniRef90_UPI001F20CBA7/36-61 DE [subseq from] hypothetical protein n=1 Tax=Niallia circulans TaxID=1397 RepID=UPI001F20CBA7
#=GS UniRef90_UPI001F20CBA7/64-88 DE [subseq from] hypothetical protein n=1 Tax=Niallia circulans TaxID=1397 RepID=UPI001F20CBA7
#=GS UniRef90_A0A1D8H334/1-33 DE [subseq from] Merozoite surface protein-3 (Fragment) n=1 Tax=Plasmodium falciparum TaxID=5833 RepID=A0A1D8H334_PLAFA
#=GS UniRef90_A0A3M0JR79/4-33 DE [subseq from] Protein Wnt n=22 Tax=Neognathae TaxID=8825 RepID=A0A3M0JR79_HIRRU
#=GS UniRef90_A0A3M0JR79/40-67 DE [subseq from] Protein Wnt n=22 Tax=Neognathae TaxID=8825 RepID=A0A3M0JR79_HIRRU
#=GS UniRef90_A0A3M0JR79/67-98 DE [subseq from] Protein Wnt n=22 Tax=Neognathae TaxID=8825 RepID=A0A3M0JR79_HIRRU
#=GS UniRef90_A0A3M0JR79/95-124 DE [subseq from] Protein Wnt n=22 Tax=Neognathae TaxID=8825 RepID=A0A3M0JR79_HIRRU
#=GS UniRef90_A0A159SN41/45-78 DE [subseq from] Merozoite surface protein 3 (Fragment) n=2 Tax=Plasmodium falciparum TaxID=5833 RepID=A0A159SN41_PLAFA
#=GS UniRef90_A0A159SLY0/58-95 DE [subseq from] Merozoite surface protein 3 (Fragment) n=2 Tax=Plasmodium falciparum TaxID=5833 RepID=A0A159SLY0_PLAFA
query EAYKKAKQASQDAEQAAKDAENASKEAEEAAKEAVNLK
UniRef90_Q9NIG6/58-95 EAYKKAKQASQDAEQAAKDAENASKEAEEAAKEAVNLK
UniRef90_A0A024VYN0/90-127 EAYKKAKQASQDAEQAAKDAENASKEAEEAAKEAVNLK
UniRef90_UPI0019011389/487-495 ------------------------KDARQVIKD-----
UniRef90_O15863/57-94 EAYKKAKQASQDAEQAAKDAEQAAKDAEQASKDAEKLK
UniRef90_A0A6J8EZW5/149-173 -------QACKDLGQACKDLEQACKDIGQACK------
UniRef90_A0A8C3V826/99-123 -AVRDAPNAVRDAPNAVRDAPNAVRD------------
UniRef90_A0A8C3V826/170-202 --VRDAPKAVRDAPNAVRDAPNAVRDAPNAVRDAP---
UniRef90_O15854/44-77 ----KAKKASEDAEKAANDAENASKEAEEAAKEAVNLK
UniRef90_A0A401U021/46-73 --------GNKDSDNGTKDSDNGTKDSDNGTKESDN--
UniRef90_UPI001F20CBA7/9-40 --RKDVEMARKDVEVARKDVEVARKDVEVASKDA----
UniRef90_UPI001F20CBA7/36-61 --------ASKDAEVASKDAEVASKDAEVASKDV----
UniRef90_UPI001F20CBA7/64-88 --------ASKDVEVASKDVEVASKDVEVARKD-----
UniRef90_A0A1D8H334/1-33 -----AKKASEDAEKAANDAENASKEAEEASKEAVNLK
UniRef90_A0A3M0JR79/4-33 ---RSAEQISEDAEQISEDAEQISKVAEQISEV-----
UniRef90_A0A3M0JR79/40-67 ----DAEQISEDAEQISEDAEQISKVAEQISK------
UniRef90_A0A3M0JR79/67-98 ---KVAEQISEDAEQISEDAEQISKDAEQISKDAE---
UniRef90_A0A3M0JR79/95-124 ---KDAEQISEDAGQISEDAEQISEDAEQISSL-----
UniRef90_A0A159SN41/45-78 ----KAKKASEDAEKAANDAENASKEAEEAAKEAVNLK
UniRef90_A0A159SLY0/58-95 EAYKKAKQASQDAEQAAKDAEQAAKDAEQASKDAEKLK
#=GC RF xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
//
This diff is collapsed.
This diff is collapsed.
# STOCKHOLM 1.0
#=GS MGYP000613015979/52-138 DE [subseq from] PL=11 UP=0 BIOMES=0000000011000
#=GS MGYP000356782790/15-100 DE [subseq from] PL=01 UP=0 BIOMES=1000000000000
#=GS MGYP000745946800/67-150 DE [subseq from] PL=00 UP=0 BIOMES=0000000011000
#=GS MGYP000365627167/34-120 DE [subseq from] PL=01 UP=0 BIOMES=1000000000000
#=GS MGYP000326981611/4-88 DE [subseq from] PL=00 UP=0 BIOMES=0000000011000
#=GS MGYP000438079158/44-95 DE [subseq from] PL=10 UP=0 BIOMES=0000000010100
#=GS MGYP000660499351/22-69 DE [subseq from] PL=00 UP=0 BIOMES=1000000000000
#=GS MGYP000054740637/2-50 DE [subseq from] PL=00 UP=0 BIOMES=0000000010100
#=GS MGYP000111717601/1-44 DE [subseq from] PL=00 UP=0 BIOMES=0000000011000
#=GS MGYP000888755554/1-44 DE [subseq from] PL=00 UP=0 BIOMES=0000000011000
#=GS MGYP000294730968/1-44 DE [subseq from] PL=00 UP=0 BIOMES=0000000010100
#=GS MGYP000492638681/46-75 DE [subseq from] PL=00 UP=0 BIOMES=0000000000001
#=GS MGYP001120896613/151-179 DE [subseq from] PL=01 UP=0 BIOMES=0101000000000
#=GS MGYP000299282490/49-75 DE [subseq from] PL=00 UP=0 BIOMES=0000101000000
#=GS MGYP000665431927/41-61 DE [subseq from] PL=10 UP=0 BIOMES=0110000000000
#=GS MGYP000236921234/89-111 DE [subseq from] PL=01 UP=0 BIOMES=0110000000000
#=GS MGYP000139113544/297-357 DE [subseq from] PL=01 UP=0 BIOMES=0000000011000
#=GS MGYP000139113544/476-542 DE [subseq from] PL=01 UP=0 BIOMES=0000000011000
query GSSGSSGMEGPLNLAHQQSRRADRLLAAGKYEEAISCHRKATTYLSEAMKLTESEQAHLSLELQRDSHMKQLLLIQERWKRA-KREERLKAHSGPSSG
MGYP000613015979/52-138 -----KVMEGPLNLAHQQSRRADRLLAAGKYEEAISCHKKAAAYLSEAMKLTQSEQAHLSLELQRDSHMKQLLLIQERWKRA-QREERLKAQQ-----
MGYP000356782790/15-100 ------VMEGPLNLAHQQSRRADRLLAAGKYEEAISCHKKAAAYLSEAMKLTQSEQAHLSLELQRDSHMKQLLLIQERWKRA-QGEARLKAQQ-----
MGYP000745946800/67-150 ------VMEGPLNLAHQQSRRADRLLAAGKYEEAISCHKKAAAYLSEAMKLTQSEQARLSLELQRDSHMKQLLLIQERWKRA-QREERLKA-------
MGYP000365627167/34-120 -----EVMEGPLNLAHQQSRRADRLLAAGKYEEAISCHKKAAVYLSEAMKLTQSKQAHLSLELQRDSHMKQLLLIQERWKRA-QGEARLKAQQ-----
MGYP000326981611/4-88 -------MEGPLNLAHQQSRRADRLLAAGKYEEAISCHKKAAAYLSEAMKLTQSEQARLSLELQRDSHMKQLLLIQERWKRP-QHEERLKAQQ-----
MGYP000438079158/44-95 -----EVMEGPLNLAHQQSRRADHLLAAGKYEEAISCHKKAAVYLSEAMKLTQSEQV-----------------------------------------
MGYP000660499351/22-69 --------NGSLNLAHQQSRRADRLLAAGKYEEAISCHKKAAAYLYEAMKLTQSEQ------------------------------------------
MGYP000054740637/2-50 -------------------------------------------YLSEAMKLTQSKQAHLSLELQRDSHMKQLLLIQERWKRA-QGEARLKAQQ-----
MGYP000111717601/1-44 ------------------------------------------------MKLTQSEQAHLSLELQRDSHMKQLLLIQERWKRA-QREERLKAQQ-----
MGYP000888755554/1-44 ------------------------------------------------MKLTQSEQARLSLELQRDSHMKQLLLIQERWKRA-QREERLKAQQ-----
MGYP000294730968/1-44 ------------------------------------------------MKLTQSKQAHLSLELQRDSHMKQLLLIQERWKRP-QHEERLKAQQ-----
MGYP000492638681/46-75 ------------NNAKFYSRLAATLSALEKYEEAIDCYQKAI--------------------------------------------------------
MGYP001120896613/151-179 -------------AAHVWNSKGDALANLGKYEEAIECYDKAI--------------------------------------------------------
MGYP000299282490/49-75 ---------------YLLNKKGDNLSRLGKFEEAIECYDKAI--------------------------------------------------------
MGYP000665431927/41-61 -----------------------ILQALGKYEEAIACYDKVITF------------------------------------------------------
MGYP000236921234/89-111 ---------------------GKILEALGKYEEAIACYDKVITF------------------------------------------------------
MGYP000139113544/297-357 ----------------REYQKALKLMTDEKYEEAIKAFEVLNNYSDSATKISECNTAILEREYQKAL--K--LMMDEKYEE-----------------
MGYP000139113544/476-542 -------------------QKATRLMEDEKYEDAIQAFEALNSYSDSTTKISECKTAILEKEYQKA--LK--LMMDEKYEEAIKAFEALN--------
#=GC RF xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.xxxxxxxxxxxxxxx
//
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment