- Notifications
You must be signed in to change notification settings - Fork 164
/
Copy pathgrabnews.py
64 lines (55 loc) · 2.21 KB
/
grabnews.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
importjson, requests
importsqlite3
defhandle(content):
ifnotcontentorcontentisNone:
content=None
returncontent
defdump(endpoint, toget):
headers= {'User-agent' : 'Chrome'}
unique=set()
url='http://www.reddit.com/r/python/'+str(endpoint)+'/.json?limit='+str(toget)
response=requests.get(url, headers=headers)
data=json.loads(response.text)
#parsed = json.dumps(data, indent = 4, sort_keys = True)
returndata
def_news(data, toget, unique):
foriinrange(toget):
parsed_content=json.dumps(data['data']['children'][i]['data'], indent=4)
content_title=handle(data['data']['children'][i]['data']['title'].strip())
content_text=handle(data['data']['children'][i]['data']['selftext'].strip())
content_author=handle(data['data']['children'][i]['data']['author_fullname'].strip())
content_ups=handle(data['data']['children'][i]['data']['ups'])
content_url=handle(data['data']['children'][i]['data']['url'].strip())
content_id=handle(data['data']['children'][i]['data']['id'])
post= (content_id, content_title, content_text, content_author, content_url, content_ups)
update_post= (content_title, content_text, content_author, content_url, content_ups, content_id)
ifcontent_idinunique:
c.execute("UPDATE top_news SET ptitle = ?, ptext = ?, pauthor = ?, purl = ?, pups = ? where pid = ? ", update_post)
print("Updated")
else:
unique.add(content_id)
c.execute("INSERT INTO top_news VALUES (?, ?, ?, ?, ?, ?)", post)
print("Inserted")
i=i+1
defget_top_news(endpoint='top', toget=10):
defconnect():
c.execute('''CREATE TABLE IF NOT EXISTS top_news
(pid text PRIMARY KEY, ptitle text, ptext text, pauthor text, purl text, pups int)''')
connect()
unique=set()
data=dump(endpoint, toget)
_news(data, toget, unique)
defget_hot_news(endpoint='hot', toget=10):
defconnect():
c.execute('''CREATE TABLE IF NOT EXISTS hot_news
(pid text PRIMARY KEY, ptitle text, ptext text, pauthor text, purl text, pups int)''')
connect()
unique=set()
data=dump(endpoint, toget)
_news(data, toget, unique)
defreddit_get():
conn=sqlite3.connect('reddit_news.db')
c=conn.cursor()
get_top_news()
conn.commit()
conn.close()