- Notifications
You must be signed in to change notification settings - Fork 406
/
Copy pathrename_clades.py
39 lines (32 loc) · 1.5 KB
/
rename_clades.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
importargparse
importyaml
if__name__=='__main__':
parser=argparse.ArgumentParser(
description="Rename clades in clades.tsv",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument('--input-clade-files', type=str, nargs='+', required=True, help="input clade files")
parser.add_argument('--name-mapping', type=str, required=False, help="YAML mapping between Nextstrain clades and display names")
parser.add_argument('--output-clades', type=str, required=True, help="renamed clade file")
args=parser.parse_args()
# read name mapping from input yaml file
ifargs.name_mapping:
withopen(args.name_mapping) asfh:
name_mapping=yaml.load(fh, Loader=yaml.FullLoader)
else:
name_mapping= {}
# write output into one consolidated file
out_clades=open(args.output_clades, "w")
# loop over input file and replace clade names were appropriate line by line
forfnameinargs.input_clade_files:
withopen(fname) asfh:
forlineinfh:
fields=line.strip('\n').split('\t')
iflen(fields) <3:
continue
fields[0] =name_mapping.get(fields[0], fields[0])
# if clade definition is based on other clade, replace name
iffields[1]=='clade':
fields[2] =name_mapping.get(fields[2], fields[2])
out_clades.write('\t'.join(fields)+'\n')
out_clades.close()