- Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathnocomment.py
126 lines (114 loc) · 4.13 KB
/
nocomment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#
# This file is part of the micropython-esp32-ulp project,
# https://github.com/micropython/micropython-esp32-ulp
#
# SPDX-FileCopyrightText: 2018-2023, the micropython-esp32-ulp authors, see AUTHORS file.
# SPDX-License-Identifier: MIT
defremove_comments(s):
"""
Remove comments of these styles:
CHASH: # comment python style, up to: EOL
CSLASHSLASH: // comment C style, up to: EOL
CSLASHSTAR: /* comment C style (single/multi line), up to: */
Strings can be like 'strings' or "strings".
Any comment-starting chars within strings are not considered.
Escaping of (string-end) chars via backslash in strings is considered.
Also, leading and trailing whitespace is removed (after comment removal).
Indented lines are re-indented afterwards with a single tab char.
Line numbers stay as in input file because empty lines are kept.
s: string with comments (can include newlines)
returns: list of text lines
"""
# note: micropython's ure module was not capable enough to process this:
# missing methods, re modes, recursion limit exceeded, ...
# simpler hacks also didn't seem powerful enough to address all the
# corner cases of CSLASHSTAR vs. *STR, so this state machine came to life:
SRC, CHASH, CSLASHSLASH, CSLASHSTAR, DSTR, SSTR=range(6) # states
line= [] # collect chars of one line
lines= [] # collect result lines
deffinish_line():
# assemble a line from characters, try to get rid of trailing and
# most of leading whitespace (keep/put one tab for indented lines).
nonlocalline
line=''.join(line)
is_indented=line.startswith(' ') orline.startswith('\t')
line=line.strip()
iflineandis_indented:
line='\t'+line
lines.append(line)
line= []
state=SRC
i=0
length=len(s)
whilei<length:
c=s[i]
cn=s[i+1] ifi+1<lengthelse'\0'
ifstate==SRC:
ifc=='#': # starting to-EOL comment
state=CHASH
i+=1
elifc=='/':
ifcn=='/': # starting to-EOL comment
state=CSLASHSLASH
i+=2
elifcn=='*': # starting a /* comment
state=CSLASHSTAR
i+=2
else:
i+=1
line.append(c)
elifc=='"':
state=DSTR
i+=1
line.append(c)
elifc=="'":
state=SSTR
i+=1
line.append(c)
elifc=='\n':
i+=1
finish_line()
else:
i+=1
line.append(c)
elifstate==CHASHorstate==CSLASHSLASH:
ifc!='\n': # comment runs until EOL
i+=1
else:
state=SRC
i+=1
finish_line()
elifstate==CSLASHSTAR:
ifc=='*'andcn=='/': # ending a comment */
state=SRC
i+=2
elifc=='\n':
i+=1
finish_line()
else:
i+=1
elifstate==DSTRandc=='"'orstate==SSTRandc=="'": # string end
state=SRC
i+=1
line.append(c)
elifstate==DSTRorstate==SSTR:
i+=1
line.append(c)
ifc=='\\': # escaping backslash
i+=1# do not look at char after the backslash
line.append(cn)
else:
raiseException("state: %d c: %s cn: %s"% (state, c, cn))
ifline:
# no final \n triggered processing these chars yet, do it now
finish_line()
returnlines
if__name__=='__main__':
importsys
filename=sys.argv[1]
withopen(filename, "r") asf:
text=f.read()
lines=remove_comments(text)
withopen(filename+".nocomments", "w") asf:
forlineinlines:
f.write(line+'\n')