-
Notifications
You must be signed in to change notification settings - Fork 0
/
04_image_scraper.py
52 lines (33 loc) · 1.32 KB
/
04_image_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import json
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# read in json url list
with open('nike_stockx_url_list.txt', 'r') as f:
nike_stockx_url_list = json.loads(f.read())
# run and collect data
sneaker_images = []
driver = webdriver.Chrome(executable_path='./chromedriver')
for url in nike_stockx_url_list:
# get url and pause
driver.get(url)
# get current url is 404 skip
if driver.current_url == 'https://stockx.com/404':
continue
else:
wait = WebDriverWait(driver, 10)
prod_name = wait.until(EC.visibility_of_element_located((By.XPATH, "//h1[@class='name']"))).text
img_src = wait.until(EC.visibility_of_element_located((
By.XPATH, "//img[@data-testid='product-detail-image']"))).get_attribute('src')
id_counter = nike_stockx_url_list.index(url)
sneaker_images.append({
"id": id_counter,
"url": url,
"product_name": prod_name,
"image_url": img_src
})
print(f'Progress: {id_counter} of {len(nike_stockx_url_list)} Completed')
# # save out data
with open('nike_image_data.txt', 'w') as f:
f.write(json.dumps(sneaker_images))