Untitled
unknown
plain_text
3 years ago
1.8 kB
8
Indexable
from selenium import webdriver
from bs4 import BeautifulSoup
import time
from selenium.webdriver.common.keys import Keys
from pdb import set_trace as bb
from selenium.webdriver.common.by import By
from multiprocessing import Pool
import random
import os
import numpy as np
from selenium.webdriver.chrome.options import Options
import sys
asset_id = sys.argv[1]
os.system('mkdir ' + asset_id)
freepick_query = "https://www.amazon.com/s?k={}&page=".format(asset_id)
#pexels_query = "https://www.pexels.com/search/fashion%20model/"
options = Options()
#options.headless = True
driver = webdriver.Chrome("/usr/lib/chromium-browser/chromedriver", options=options)
for i in range(500):
query = freepick_query + str(i+1)
driver.get(query)
body = driver.find_element(By.CSS_SELECTOR, 'body')
for j in range(8):
time.sleep(0.1)
body.send_keys(Keys.PAGE_DOWN)
soup = BeautifulSoup(driver.page_source,'html.parser')
items = soup.find_all('img', class_='s-image')
commands = []
for img in items:
try:
name = img.get('alt')
link = img.get('src')
s = str(np.random.randint(100))
name = name + '_' + str(s)
name = name.replace(" ", "_") + '.jpg'
name = name.replace('/', '')
name = name.replace('&', 'and')
name = name.replace("'", '')
name = name.replace('"', '')
link = link.replace('UL320_.jpg', 'UL2000_.jpg')
driver.get(link)
item = driver.find_element(By.XPATH, '/html/body/img')
item.screenshot(asset_id + '/' + name)
time.sleep(0.25)
#os.system('curl --output {}/{} {}'.format(asset_id, name, link))
except:
continueEditor is loading...