Skip to content

Commit 340b925

Browse files
committed
Include MLR lineage fitness
This include a script to grab live estimate of MLR lineage fitness and apply this as a tip-level coloring in the tree.
1 parent 20f5fc3 commit 340b925

File tree

3 files changed

+96
-0
lines changed

3 files changed

+96
-0
lines changed

scripts/fetch_mlr_lineage_fitness.py

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
fromaugur.ioimportread_metadata
2+
fromaugur.utilsimportwrite_json
3+
importrequests
4+
importjson
5+
importpandasaspd
6+
importargparse
7+
importmath
8+
9+
# Set up argument parser
10+
parser=argparse.ArgumentParser(description="Process metadata and growth advantage data.")
11+
parser.add_argument("--metadata", required=True, help="Path to the metadata file (TSV or compressed .tsv.xz format).")
12+
parser.add_argument("--metadata-id-columns", default=["strain", "name", "Virus name"], nargs="+", help="List of columns to use as identifiers in the metadata file.")
13+
parser.add_argument("--metadata-clade-attribute", default="Nextclade_pango", help="Matched attribute to MLR variants.")
14+
parser.add_argument("--mlr-url", default="https://data.nextstrain.org/files/workflows/forecasts-ncov/gisaid/pango_lineages/global/mlr/latest_results.json", help="URL to fetch the forecasts JSON data.")
15+
parser.add_argument("--output-node-data", required=True, help="Path to save the output JSON node data.")
16+
17+
args=parser.parse_args()
18+
19+
deffetch_growth_advantages(mlr_url):
20+
try:
21+
response=requests.get(mlr_url)
22+
response.raise_for_status() # Raise an exception for HTTP errors
23+
json_data=response.json() # Parse the JSON content
24+
data=json_data["data"]
25+
26+
growth_advantages= {}
27+
forentryindata:
28+
ifall(keyinentryforkeyin ["location", "site", "variant", "value", "ps"]):
29+
ifentry["location"] =="hierarchical"andentry["site"] =="ga"andentry["ps"] =="median":
30+
growth_advantages[entry["variant"]] =entry["value"]
31+
returngrowth_advantages
32+
exceptExceptionase:
33+
print(f"Error fetching the JSON file: {e}")
34+
returnNone
35+
36+
try:
37+
# Fetch the growth advantages
38+
growth_advantages=fetch_growth_advantages(args.mlr_url)
39+
40+
# Load the local metadata file
41+
metadata_file=args.metadata
42+
metadata=read_metadata(
43+
metadata_file,
44+
id_columns=args.metadata_id_columns
45+
)
46+
47+
# Match Nextclade_pango entries to the growth advantage
48+
ifgrowth_advantages:
49+
metadata[args.metadata_clade_attribute] =metadata[args.metadata_clade_attribute].map(growth_advantages)
50+
else:
51+
metadata[args.metadata_clade_attribute] =math.nan
52+
53+
# Output rows with matched data
54+
print(metadata.head()) # Display the first few rows as an example
55+
56+
# Create a node data object with growth advantages
57+
node_data= {}
58+
forindex, recordinmetadata.iterrows():
59+
node_data[index] = {
60+
"mlr_lineage_fitness": record[args.metadata_clade_attribute]
61+
}
62+
63+
# Save node data
64+
write_json({"nodes": node_data}, args.output_node_data)
65+
66+
exceptFileNotFoundErrorase:
67+
print(f"Error reading metadata file: {e}")
68+
exceptExceptionase:
69+
print(f"An unexpected error occurred: {e}")

workflow/snakemake_rules/export_for_nextstrain.smk

+5
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,11 @@ rule auspice_config:
192192
"title": "Mutational Fitness",
193193
"type": "continuous"
194194
},
195+
{
196+
"key": "mlr_lineage_fitness",
197+
"title": "MLR lineage fitness",
198+
"type": "continuous"
199+
},
195200
{
196201
"key": "region",
197202
"title": "Region",

workflow/snakemake_rules/main_workflow.smk

+22
Original file line numberDiff line numberDiff line change
@@ -1170,6 +1170,27 @@ rule mutational_fitness:
11701170
--output {output} 2>&1 | tee {log}
11711171
"""
11721172

1173+
rulemlr_lineage_fitness:
1174+
input:
1175+
metadata="results/{build_name}/metadata_adjusted.tsv.xz",
1176+
output:
1177+
node_data="results/{build_name}/mlr_lineage_fitness.json",
1178+
benchmark:
1179+
"benchmarks/mlr_lineage_fitness_{build_name}.txt",
1180+
conda:
1181+
config["conda_environment"],
1182+
log:
1183+
"logs/mlr_lineage_fitness_{build_name}.txt",
1184+
params:
1185+
metadata_id_columns=config["sanitize_metadata"]["metadata_id_columns"],
1186+
shell:
1187+
r"""
1188+
python3 scripts/fetch_mlr_lineage_fitness.py \
1189+
--metadata {input.metadata} \
1190+
--metadata-id-columns {params.metadata_id_columns:q} \
1191+
--output-node-data {output.node_data} 2>&1 | tee {log}
1192+
"""
1193+
11731194
rulecalculate_epiweeks:
11741195
input:
11751196
metadata="results/{build_name}/metadata_adjusted.tsv.xz",
@@ -1255,6 +1276,7 @@ def _get_node_data_by_wildcards(wildcards):
12551276
rules.traits.output.node_data,
12561277
rules.logistic_growth.output.node_data,
12571278
rules.mutational_fitness.output.node_data,
1279+
rules.mlr_lineage_fitness.output.node_data,
12581280
rules.distances.output.node_data,
12591281
rules.calculate_epiweeks.output.node_data,
12601282
]

0 commit comments

Comments
 (0)
close