- Notifications
You must be signed in to change notification settings - Fork 406
/
Copy pathadd_priorities_to_meta.py
45 lines (35 loc) · 1.71 KB
/
add_priorities_to_meta.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
"""
Add column to metadata with the priorities of 'context' sequences
relative to the 'focal' samples
"""
importargparse
importpandasaspd
importcsv
importjson
if__name__=='__main__':
parser=argparse.ArgumentParser(
description="Add columns for priorities of sequences relative to diff focal regions",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("--metadata", type=str, required=True, help="metadata")
parser.add_argument("--priorities", type=str, nargs="+", required=True, help="priorities files")
parser.add_argument("--config", type=str, help="config file to modify")
parser.add_argument("--output-meta", type=str, required=True, help="adjusted metadata")
parser.add_argument("--output-config", type=str, help="modified config")
args=parser.parse_args()
metadata=pd.read_csv(args.metadata, sep='\t')
withopen(args.config) asfh:
input_json=json.load(fh)
forpriority_fileinargs.priorities:
p_f=priority_file.replace(".tsv", "")
region=p_f.split("_")[2]
column_name="".join(["priorities_",region])
withopen(priority_file, 'r') asf:
reader=csv.reader(f, delimiter='\t')
priors= {r[0]: r[1] forrinreaderiflen(r)>1}
assign_priors= [priors[st] ifstinpriorselse""forstinmetadata.strain]
metadata.insert(11, column_name, assign_priors)
input_json['colorings'].append({'key': column_name, 'type': 'continuous'})
metadata.to_csv(args.output_meta, index=False, sep="\t")
withopen(args.output_config, 'w') asfh:
json.dump(input_json, fh, indent=2)