- Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathproblem_scraper.py
59 lines (41 loc) · 1.59 KB
/
problem_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
importos
importjson
importtime
fromseleniumimportwebdriver
fromselenium.webdriver.common.keysimportKeys
problem_files=os.listdir('./problems')
problem_files=set([i.split('.')[0] foriinproblem_files])
solution_paths= ['./solutions/easy', './solutions/medium', './solutions/hard']
solution_files= []
solution_dict= {}
foriinsolution_paths:
path_files=os.listdir(i)
forjinpath_files:
solution_dict[j.split('.')[0]] =i+'/'+j
path_files= [j.split('.')[0] forjinpath_files]
solution_files.extend(path_files)
solution_files=set(solution_files)
diff_files=list(solution_files.difference(problem_files))
print(diff_files)
withopen('links.json', 'r') asjson_file:
links=json.loads(json_file.read())
driver=webdriver.Chrome()
driver.get('https://leetcode.com/problemset/algorithms/')
forproblem_numberindiff_files:
problem=links[problem_number]
link=problem['link']
driver.get(link)
whileTrue:
try:
statement_ele=driver.find_element_by_xpath('//*[@id="app"]/div/div[2]/div/div/div[1]/div/div[1]/div[1]/div/div[2]/div/div[2]')
statement=statement_ele.get_attribute("innerHTML")
break
except:
time.sleep(1)
continue
problem['statement'] =statement
problem['language'] =solution_dict[problem_number].split('.')[-1]
withopen(solution_dict[problem_number], 'r') asfile:
problem['solution'] =file.read()
withopen('./problems/'+problem_number+'.json', 'w') asjson_file:
json.dump(problem, json_file)