- Notifications
You must be signed in to change notification settings - Fork 2.9k
/
Copy pathgen_blog_post_html.py
193 lines (158 loc) · 5.75 KB
/
gen_blog_post_html.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
"""Converter from CHANGELOG.md (Markdown) to HTML suitable for a mypy blog post.
How to use:
1. Write release notes in CHANGELOG.md.
2. Make sure the heading for the next release is of form `## Mypy X.Y`.
2. Run `misc/gen_blog_post_html.py X.Y > target.html`.
4. Manually inspect and tweak the result.
Notes:
* There are some fragile assumptions. Double check the output.
"""
importargparse
importhtml
importos
importre
importsys
defformat_lists(h: str) ->str:
a=h.splitlines()
r= []
i=0
bullets= ("- ", "* ", " * ")
whilei<len(a):
ifa[i].startswith(bullets):
r.append("<p><ul>")
whilei<len(a) anda[i].startswith(bullets):
r.append("<li>%s"%a[i][2:].lstrip())
i+=1
r.append("</ul>")
else:
r.append(a[i])
i+=1
return"\n".join(r)
defformat_code(h: str) ->str:
a=h.splitlines()
r= []
i=0
whilei<len(a):
ifa[i].startswith(" ") ora[i].startswith("```"):
indent=a[i].startswith(" ")
language: str=""
ifnotindent:
language=a[i][3:]
i+=1
iflanguage:
r.append(f'<pre><code class="language-{language}">')
else:
r.append("<pre><code>")
whilei<len(a) and (
(indentanda[i].startswith(" ")) or (notindentandnota[i].startswith("```"))
):
# Undo > and <
line=a[i].replace(">", ">").replace("<", "<")
ifindent:
# Undo this extra level of indentation so it looks nice with
# syntax highlighting CSS.
line=line[4:]
r.append(html.escape(line))
i+=1
r.append("</code></pre>")
ifnotindentanda[i].startswith("```"):
i+=1
else:
r.append(a[i])
i+=1
formatted="\n".join(r)
# remove empty first line for code blocks
returnre.sub(r"<code([^\>]*)>\n", r"<code\1>", formatted)
defconvert(src: str) ->str:
h=src
# Replace < and >.
h=re.sub(r"<", "<", h)
h=re.sub(r">", ">", h)
# Title
h=re.sub(r"^## (Mypy [0-9.]+)", r"<h1>\1 Released</h1>", h, flags=re.MULTILINE)
# Subheadings
h=re.sub(r"\n### ([A-Z`].*)\n", r"\n<h2>\1</h2>\n", h)
# Sub-subheadings
h=re.sub(r"\n\*\*([A-Z_`].*)\*\*\n", r"\n<h3>\1</h3>\n", h)
h=re.sub(r"\n`\*\*([A-Z_`].*)\*\*\n", r"\n<h3>`\1</h3>\n", h)
# Translate `**`
h=re.sub(r"`\*\*`", "<tt>**</tt>", h)
# Paragraphs
h=re.sub(r"\n\n([A-Z])", r"\n\n<p>\1", h)
# Bullet lists
h=format_lists(h)
# Code blocks
h=format_code(h)
# Code fragments
h=re.sub(r"``([^`]+)``", r"<tt>\1</tt>", h)
h=re.sub(r"`([^`]+)`", r"<tt>\1</tt>", h)
# Remove **** noise
h=re.sub(r"\*\*\*\*", "", h)
# Bold text
h=re.sub(r"\*\*([A-Za-z].*?)\*\*", r" <b>\1</b>", h)
# Emphasized text
h=re.sub(r" \*([A-Za-z].*?)\*", r" <i>\1</i>", h)
# Remove redundant PR links to avoid double links (they will be generated below)
h=re.sub(r"\[(#[0-9]+)\]\(https://github.com/python/mypy/pull/[0-9]+/?\)", r"\1", h)
# Issue and PR links
h=re.sub(r"\((#[0-9]+)\) +\(([^)]+)\)", r"(\2, \1)", h)
h=re.sub(
r"fixes #([0-9]+)",
r'fixes issue <a href="https://github.com/python/mypy/issues/\1">\1</a>',
h,
)
# Note the leading space to avoid stomping on strings that contain #\d in the middle (such as
# links to PRs in other repos)
h=re.sub(r" #([0-9]+)", r' PR <a href="https://github.com/python/mypy/pull/\1">\1</a>', h)
h=re.sub(r"\) \(PR", ", PR", h)
# Markdown links
h=re.sub(r"\[([^]]*)\]\(([^)]*)\)", r'<a href="\2">\1</a>', h)
# Add random links in case they are missing
h=re.sub(
r"contributors to typeshed:",
'contributors to <a href="https://github.com/python/typeshed">typeshed</a>:',
h,
)
# Add top-level HTML tags and headers for syntax highlighting css/js.
# We're configuring hljs to highlight python and bash code. We can remove
# this configure call to make it try all the languages it supports.
h=f"""<html>
<meta charset="utf-8" />
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/a11y-light.min.css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
<script>hljs.configure({{languages:["python","bash"]}});hljs.highlightAll();</script>
<body>
{h}
</body>
</html>"""
returnh
defextract_version(src: str, version: str) ->str:
a=src.splitlines()
i=0
heading=f"## Mypy {version}"
whilei<len(a):
ifa[i].strip() ==heading:
break
i+=1
else:
raiseRuntimeError(f"Can't find heading {heading!r}")
j=i+1
whilenota[j].startswith("## "):
j+=1
return"\n".join(a[i:j])
defmain() ->None:
parser=argparse.ArgumentParser(
description="Generate HTML release blog post based on CHANGELOG.md and write to stdout."
)
parser.add_argument("version", help="mypy version, in form X.Y or X.Y.Z")
args=parser.parse_args()
version: str=args.version
ifnotre.match(r"[0-9]+(\.[0-9]+)+$", version):
sys.exit(f"error: Version must be of form X.Y or X.Y.Z, not {version!r}")
changelog_path=os.path.join(os.path.dirname(__file__), os.path.pardir, "CHANGELOG.md")
src=open(changelog_path).read()
src=extract_version(src, version)
dst=convert(src)
sys.stdout.write(dst)
if__name__=="__main__":
main()