-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathscraping.py
98 lines (83 loc) · 3.5 KB
/
scraping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException
from time import sleep
import os
import csv
import json
# PATH = 'C:\\Users\\Farneet\\Downloads\\chromedriver.exe'
# options = Options()
# options.add_experimental_option('detach', True)
# options.add_argument('--ignore-certificate-errors')
# cService = webdriver.ChromeService(executable_path=PATH)
# driver = webdriver.Chrome(service=cService, options=options)
# driver.get('https://leetcode.com/problemset/all/')
# def extract_problem_data():
# try:
# WebDriverWait(driver, 10).until(
# EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div[role="rowgroup"]'))
# )
# elements = driver.find_elements(By.CSS_SELECTOR, 'div[role="row"]')
# data = []
# for element in elements[1:]: # Skip the header row
# text = element.text.split()
# if len(text) >= 4: # Ensuring there's enough data to parse
# problem_id = text[0].strip('.')
# problem_name = ' '.join(text[1:-2])
# acceptance_rate = text[-2].strip('%')
# difficulty = text[-1]
# data.append([problem_id, problem_name, acceptance_rate, difficulty])
# return data
# except StaleElementReferenceException:
# return extract_problem_data()
# def click_next_button():
# try:
# next_button = WebDriverWait(driver, 1).until(
# EC.presence_of_element_located((By.CSS_SELECTOR, 'button[aria-label="next"]:not([disabled])'))
# )
# if next_button.is_enabled():
# next_button.click()
# sleep(2) # Wait for page to load
# return True
# else:
# return False
# except NoSuchElementException:
# return False
# # File to store problem details
# file_path = 'problem_data.csv'
# if os.path.exists(file_path):
# os.remove(file_path) # Delete the file if it exists to start fresh
# # Write data to CSV file
# with open(file_path, 'w', newline='', encoding='utf-8') as file:
# writer = csv.writer(file)
# writer.writerow(['Problem ID', 'Problem Name', 'Acceptance Rate', 'Difficulty']) # Column headers
# while True:
# problem_data = extract_problem_data()
# if problem_data:
# writer.writerows(problem_data)
# if not click_next_button():
# break
# driver.quit()
# File paths
csv_file_path = 'problem_data.csv'
json_file_path = 'problem_data.json'
data_dict = {}
with open(csv_file_path, 'r', newline='', encoding='utf-8') as csv_file:
csv_reader = csv.DictReader(csv_file)
for row in csv_reader:
problem_id = row['Problem ID']
problem_data = {
"id": row['Problem ID'],
"name": row['Problem Name'],
"acceptance": row['Acceptance Rate'],
"difficulty": row['Difficulty'],
"displayText": f'{row["Problem ID"]}. {row["Problem Name"]}'
}
data_dict[problem_id] = problem_data
# Write data to JSON file
with open(json_file_path, 'w') as json_file:
json.dump(data_dict, json_file, indent=4)
print("Conversion to JSON completed successfully!")