- Notifications
You must be signed in to change notification settings - Fork 164
/
Copy pathtwitter_scraper.py
75 lines (63 loc) · 2.94 KB
/
twitter_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#This code searches for tweets with a particuar keyword and writes certain fields into a CSV file
importsys, csv
importtwitter
importos
importtweepy
# Replace the API_KEY and API_SECRET with your application's key and secret.
#This code is using AppAuthHandler, not OAuthHandler to get higher limits, 2.5 times.
auth=tweepy.AppAuthHandler('j2UAZfXuk6iitAjnLjbFcmn0y', 'Q9X7g4eAhyElO8u5VI183QwRCUF1sXrZs8m9poGt6Q1pmN4cOw')
api=tweepy.API(auth, wait_on_rate_limit=True,
wait_on_rate_limit_notify=True)
if (notapi):
print ("Can't Authenticate")
sys.exit(-1)
defclean(val):
clean=""
ifval:
clean=val.encode('utf-8')
returnclean
searchQuery='#techsytalk'#This is for your hasthag(s), separate by comma
maxTweets=80000# Large max nr
tweetsPerQry=100# the max the API permits
fName='myfile.csv'#The CSV file where your tweets will be stored
csvfile=open(fName, 'w');
csvwriter=csv.writer(csvfile)
count=0
# If results from a specific ID onwards are reqd, set since_id to that ID.
# else default to no lower limit, go as far back as API allows
sinceId=None
# If results only below a specific ID are, set max_id to that ID.
# else default to no upper limit, start from the most recent tweet matching the search query.
max_id=-1
tweetCount=0
#print("Downloading max {0} tweets".format(maxTweets))
withopen(fName, 'w') ascsvfile:
whiletweetCount<maxTweets:
try:
if (max_id<=0):
if (notsinceId):
new_tweets=api.search(q=searchQuery, count=tweetsPerQry)
else:
new_tweets=api.search(q=searchQuery, count=tweetsPerQry,
since_id=sinceId)
else:
if (notsinceId):
new_tweets=api.search(q=searchQuery, count=tweetsPerQry,
max_id=str(max_id-1))
else:
new_tweets=api.search(q=searchQuery, count=tweetsPerQry,
max_id=str(max_id-1),
since_id=sinceId)
ifnotnew_tweets:
print("No more tweets found")
break
fortweetinnew_tweets:
csvwriter.writerow([tweet.created_at, clean(tweet.user.screen_name), clean(tweet.text), tweet.user.created_at, tweet.user.followers_count, tweet.user.friends_count, tweet.user.statuses_count, clean(tweet.user.location), tweet.user.geo_enabled, tweet.user.lang, clean(tweet.user.time_zone), tweet.retweet_count]);
tweetCount+=len(new_tweets)
#print("Downloaded {0} tweets".format(tweetCount))
max_id=new_tweets[-1].id
exceptExceptionase:
# Just exit if any error
print("some error : "+str(e))
pass
print ("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fName))