- Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlink_scraper.py
43 lines (30 loc) · 1.24 KB
/
link_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
importtime
importjson
fromseleniumimportwebdriver
fromselenium.webdriver.common.keysimportKeys
driver=webdriver.Chrome()
driver.get('https://leetcode.com/problemset/algorithms/')
input("Change filter to show all problems: ")
links= {}
count=1
whileTrue:
try:
link_ele=driver.find_element_by_xpath('//*[@id="question-app"]/div/div[2]/div[2]/div[2]/table/tbody[1]/tr['+str(count)+']/td[3]/div/a')
link=link_ele.get_attribute('href')
name=link_ele.get_attribute('innerHTML')
difficulty_ele=driver.find_element_by_xpath('//*[@id="question-app"]/div/div[2]/div[2]/div[2]/table/tbody[1]/tr['+str(count)+']/td[6]/span')
difficulty=difficulty_ele.get_attribute('innerHTML')
number_ele=driver.find_element_by_xpath('//*[@id="question-app"]/div/div[2]/div[2]/div[2]/table/tbody[1]/tr['+str(count)+']/td[2]')
number=number_ele.get_attribute('innerHTML')
links[number] = {
'link': link,
'name': name,
'difficulty': difficulty
}
count+=1
ifcount%50==0:
print(count)
except:
break
withopen('links.json', 'w') asjson_file:
json.dump(links, json_file)