- Notifications
You must be signed in to change notification settings - Fork 28.8k
/
Copy pathextract_warnings.py
134 lines (111 loc) · 4.58 KB
/
extract_warnings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
importargparse
importjson
importos
importtime
importzipfile
fromget_ci_error_statisticsimportdownload_artifact, get_artifacts_links
fromtransformersimportlogging
logger=logging.get_logger(__name__)
defextract_warnings_from_single_artifact(artifact_path, targets):
"""Extract warnings from a downloaded artifact (in .zip format)"""
selected_warnings=set()
buffer= []
defparse_line(fp):
forlineinfp:
ifisinstance(line, bytes):
line=line.decode("UTF-8")
if"warnings summary (final)"inline:
continue
# This means we are outside the body of a warning
elifnotline.startswith(" "):
# process a single warning and move it to `selected_warnings`.
iflen(buffer) >0:
warning="\n".join(buffer)
# Only keep the warnings specified in `targets`
ifany(f": {x}: "inwarningforxintargets):
selected_warnings.add(warning)
buffer.clear()
continue
else:
line=line.strip()
buffer.append(line)
iffrom_gh:
forfilenameinos.listdir(artifact_path):
file_path=os.path.join(artifact_path, filename)
ifnotos.path.isdir(file_path):
# read the file
iffilename!="warnings.txt":
continue
withopen(file_path) asfp:
parse_line(fp)
else:
try:
withzipfile.ZipFile(artifact_path) asz:
forfilenameinz.namelist():
ifnotos.path.isdir(filename):
# read the file
iffilename!="warnings.txt":
continue
withz.open(filename) asfp:
parse_line(fp)
exceptException:
logger.warning(
f"{artifact_path} is either an invalid zip file or something else wrong. This file is skipped."
)
returnselected_warnings
defextract_warnings(artifact_dir, targets):
"""Extract warnings from all artifact files"""
selected_warnings=set()
paths= [os.path.join(artifact_dir, p) forpinos.listdir(artifact_dir) if (p.endswith(".zip") orfrom_gh)]
forpinpaths:
selected_warnings.update(extract_warnings_from_single_artifact(p, targets))
returnselected_warnings
if__name__=="__main__":
deflist_str(values):
returnvalues.split(",")
parser=argparse.ArgumentParser()
# Required parameters
parser.add_argument("--workflow_run_id", type=str, required=True, help="A GitHub Actions workflow run id.")
parser.add_argument(
"--output_dir",
type=str,
required=True,
help="Where to store the downloaded artifacts and other result files.",
)
parser.add_argument("--token", default=None, type=str, help="A token that has actions:read permission.")
# optional parameters
parser.add_argument(
"--targets",
default="DeprecationWarning,UserWarning,FutureWarning",
type=list_str,
help="Comma-separated list of target warning(s) which we want to extract.",
)
parser.add_argument(
"--from_gh",
action="store_true",
help="If running from a GitHub action workflow and collecting warnings from its artifacts.",
)
args=parser.parse_args()
from_gh=args.from_gh
iffrom_gh:
# The artifacts have to be downloaded using `actions/download-artifact@v4`
pass
else:
os.makedirs(args.output_dir, exist_ok=True)
# get download links
artifacts=get_artifacts_links(args.workflow_run_id, token=args.token)
withopen(os.path.join(args.output_dir, "artifacts.json"), "w", encoding="UTF-8") asfp:
json.dump(artifacts, fp, ensure_ascii=False, indent=4)
# download artifacts
foridx, (name, url) inenumerate(artifacts.items()):
print(name)
print(url)
print("="*80)
download_artifact(name, url, args.output_dir, args.token)
# Be gentle to GitHub
time.sleep(1)
# extract warnings from artifacts
selected_warnings=extract_warnings(args.output_dir, args.targets)
selected_warnings=sorted(selected_warnings)
withopen(os.path.join(args.output_dir, "selected_warnings.json"), "w", encoding="UTF-8") asfp:
json.dump(selected_warnings, fp, ensure_ascii=False, indent=4)