- Notifications
You must be signed in to change notification settings - Fork 406
/
Copy pathfix-colorings.py
89 lines (75 loc) · 4.17 KB
/
fix-colorings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
importargparse
importjson
importre
fromnumpyimportlinspace
frommathimportfloor
defadjust_coloring_for_epiweeks(dataset):
"""
If an auspice JSON specifies a colouring with the key "epiweek" (case sensitive) then we create a categorical
colorscale which evenly spaces the canonical nextstrain rainbow across the observed time window.
NOTE: epiweek must be in CDC format ("YYYYMM") but this may be relaxed to include ISO format in the future.
"""
EPIKEY="epiweek"
try:
(cidx, coloring) = [(i, c) fori, cinenumerate(dataset['meta'].get("colorings", [])) ifc['key']==EPIKEY][0]
exceptIndexError: # coloring doesn't define an epiweek
return
# remove any duplicate coloring entries in the JSON to ensure the entry we edit is the one used by Auspice
# (NOTE: this is augur bug https://github.com/nextstrain/augur/issues/719)
dataset['meta']['colorings'] = [cfori,cinenumerate(dataset['meta']['colorings']) ifnot (c['key']==EPIKEYandi!=cidx)]
# delay import to support older setups not using epiweeks package
fromepiweeksimportYear, Week
observed_values=set()
defrecurse(node):
value=node.get("node_attrs", {}).get(EPIKEY, {}).get("value", False)
ifvalue:
# we validate using both the epiweeks package and a regex (epiweeks will perform coercion of non-valid data into valid data)
ifnotre.match(r'^(\d{4})(\d{2})$', value):
raise(ValueError(f"Epiweek value {value} was not in format YYYYMM."))
week=Week.fromstring(value, system="cdc") # raises ValueError if not valid
observed_values.add(week)
forchildinnode.get("children", []):
recurse(child)
try:
recurse(dataset["tree"])
exceptValueErrorase:
print(str(e))
print("Skipping color scale creation for epiweek.")
return
observed_values=sorted(list(observed_values))
## generate epiweeks across the entire observed range for color generation
epiweeks= [ observed_values[0] ]
whileepiweeks[-1] <observed_values[-1]:
epiweeks.append(epiweeks[-1]+1)
## generate rainbow colour scale across epiweeks.
## Since a "default" augur install does not include matplotlib, rather than interpolating between values in the scale
## we reuse them. This only applies when n(epiweeks)>30, where distinguising between colors is problematic anyway.
rainbow= ["#511EA8", "#482BB6", "#4039C3", "#3F4ACA", "#3E5CD0", "#416CCE", "#447CCD", "#4989C4", "#4E96BC", "#559FB0", "#5DA8A4", "#66AE96", "#6FB388", "#7AB77C", "#85BA6F", "#91BC64", "#9DBE5A", "#AABD53", "#B6BD4B", "#C2BA46", "#CDB642", "#D6B03F", "#DDA83C", "#E29D39", "#E69036", "#E67F33", "#E56D30", "#E2592C", "#DF4428", "#DC2F24"]
color_indicies= [floor(x) forxinlinspace(0, len(rainbow), endpoint=False, num=len(epiweeks))]
coloring['scale'] = [
[epiweek.cdcformat(), rainbow[color_indicies[i]]]
fori,epiweekinenumerate(epiweeks)
ifepiweekinobserved_values
]
## auspice will order the legend according to the provided color scale, so there is no need to set
## `coloring['legend']` unless we want to restrict this for some reason.
coloring['type'] ='categorical'# force the scale type to be categorical
if__name__=='__main__':
parser=argparse.ArgumentParser(
description="Remove extraneous colorings",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument('--input', type=str, metavar="JSON", required=True, help="input Auspice JSON")
parser.add_argument('--output', type=str, metavar="JSON", required=True, help="output Auspice JSON")
args=parser.parse_args()
withopen(args.input, "r") asf:
input_json=json.load(f)
keys_to_remove= ["genbank_accession", "gisaid_epi_isl"]
fixed_colorings= []
forcoloringininput_json["meta"]["colorings"]:
ifcoloring['key'] notinkeys_to_remove:
fixed_colorings.append(coloring)
input_json["meta"]["colorings"] =fixed_colorings
adjust_coloring_for_epiweeks(input_json)
withopen(args.output, 'w') asf:
json.dump(input_json, f, indent=2)