- Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathwordcount.py
32 lines (25 loc) · 790 Bytes
/
wordcount.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
fromhtml.parserimportHTMLParser
importsys
classArticleParser(HTMLParser):
def__init__(self):
super().__init__()
self.in_article=False
self.text= []
defhandle_starttag(self, tag, _):
iftag=="article":
self.in_article=True
defhandle_endtag(self, tag):
iftag=="article":
self.in_article=False
defhandle_data(self, data):
ifself.in_article:
self.text.append(data)
if__name__=="__main__":
total=0
forfilenameinsys.argv[1:]:
withopen(filename, "r", encoding="utf-8") asf:
parser=ArticleParser()
parser.feed(f.read())
text=" ".join(parser.text)
total+=len(text.split())
print(total)