- Notifications
You must be signed in to change notification settings - Fork 406
/
Copy pathmask-alignment.py
54 lines (48 loc) · 2.11 KB
/
mask-alignment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
"""
Mask initial bases from alignment FASTA
"""
importargparse
fromaugur.ioimportopen_file, read_sequences, write_sequences
importBio
importBio.SeqIO
fromBio.SeqimportSeq
defmask_terminal_gaps(seq):
L=len(seq)
seq_trimmed=seq.lstrip('-')
left_gaps=L-len(seq_trimmed)
seq_trimmed=seq_trimmed.rstrip('-')
right_gaps=L-len(seq_trimmed) -left_gaps
return"N"*left_gaps+seq_trimmed+"N"*right_gaps
if__name__=='__main__':
parser=argparse.ArgumentParser(
description="Mask initial bases from alignment FASTA",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("--alignment", required=True, help="FASTA file of alignment")
parser.add_argument("--mask-terminal-gaps", action='store_true', help="fill all terminal gaps with N as they likely represent missing data")
parser.add_argument("--mask-from-beginning", type=int, required=True, help="number of bases to mask from start")
parser.add_argument("--mask-from-end", type=int, help="number of bases to mask from end")
parser.add_argument("--mask-sites", nargs='+', type=int, help="list of sites to mask")
parser.add_argument("--output", required=True, help="FASTA file of output alignment")
args=parser.parse_args()
begin_length=0
ifargs.mask_from_beginning:
begin_length=args.mask_from_beginning
end_length=0
ifargs.mask_from_end:
end_length=args.mask_from_end
withopen_file(args.output, 'w') asoutfile:
forrecordinread_sequences(args.alignment):
seq=str(record.seq)
ifargs.mask_terminal_gaps:
seq=mask_terminal_gaps(seq)
start="N"*begin_length
middle=seq[begin_length:-end_length]
end="N"*end_length
seq_list=list(start+middle+end)
ifargs.mask_sites:
forsiteinargs.mask_sites:
ifseq_list[site-1]!='-':
seq_list[site-1] ="N"
record.seq=Seq("".join(seq_list))
write_sequences(record, outfile)