Commit 148afc98 authored by Brian Loyal's avatar Brian Loyal
Browse files

Update fasta code to account for multi-line seqs

parent 8057eb84
...@@ -309,10 +309,16 @@ def main(args): ...@@ -309,10 +309,16 @@ def main(args):
for fasta_file in os.listdir(args.fasta_dir): for fasta_file in os.listdir(args.fasta_dir):
with open(os.path.join(args.fasta_dir, fasta_file), "r") as fp: with open(os.path.join(args.fasta_dir, fasta_file), "r") as fp:
data = fp.read() data = fp.read()
tags, seqs = parse_fasta(data) tags = []
# assert len(tags) == len(set(tags)), "All FASTA tags must be unique" seqs = []
for prot in data.split(">")[1::]:
lines = prot.strip().split("\n")
tags.append(lines[0].strip().split()[0])
seqs.append("".join(lines[1:]))
assert len(tags) == len(set(tags)), "All FASTA tags must be unique"
tag = '-'.join(tags) tag = '-'.join(tags)
output_name = f'{tag}_{args.config_preset}' output_name = f'{tag}_{args.config_preset}'
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment