Untitled
unknown
plain_text
2 years ago
1.8 kB
7
Indexable
from selenium import webdriver from bs4 import BeautifulSoup import time from selenium.webdriver.common.keys import Keys from pdb import set_trace as bb from selenium.webdriver.common.by import By from multiprocessing import Pool import random import os import numpy as np from selenium.webdriver.chrome.options import Options import sys asset_id = sys.argv[1] os.system('mkdir ' + asset_id) freepick_query = "https://www.amazon.com/s?k={}&page=".format(asset_id) #pexels_query = "https://www.pexels.com/search/fashion%20model/" options = Options() #options.headless = True driver = webdriver.Chrome("/usr/lib/chromium-browser/chromedriver", options=options) for i in range(500): query = freepick_query + str(i+1) driver.get(query) body = driver.find_element(By.CSS_SELECTOR, 'body') for j in range(8): time.sleep(0.1) body.send_keys(Keys.PAGE_DOWN) soup = BeautifulSoup(driver.page_source,'html.parser') items = soup.find_all('img', class_='s-image') commands = [] for img in items: try: name = img.get('alt') link = img.get('src') s = str(np.random.randint(100)) name = name + '_' + str(s) name = name.replace(" ", "_") + '.jpg' name = name.replace('/', '') name = name.replace('&', 'and') name = name.replace("'", '') name = name.replace('"', '') link = link.replace('UL320_.jpg', 'UL2000_.jpg') driver.get(link) item = driver.find_element(By.XPATH, '/html/body/img') item.screenshot(asset_id + '/' + name) time.sleep(0.25) #os.system('curl --output {}/{} {}'.format(asset_id, name, link)) except: continue
Editor is loading...