Etsy Playboy Magazine Scrape
user_2633811104
powershell
3 years ago
4.2 kB
82
Indexable
$scrape_folder = "C:\Scrape\Etsy" $output_info_path = $scrape_folder+"\Etsy-Playboy-data-2021-12-20.txt" $base_url = "https://www.etsy.com" $pg_max = 87 $newChrome = $true if ($newChrome) { # Add the working directory to the environment path $script_path = $scrape_folder if (($env:Path -split ';') -notcontains $script_path) { $env:Path += ";$script_path" } # Import Selenium to PowerShell using the Import-Module cmdlet Import-Module "$($script_path)\WebDriver.dll" # Create a new ChromeDriver Object instance $ChromeDriver = New-Object OpenQA.Selenium.Chrome.ChromeDriver } $month_array = @( "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" ) $year_array = @( "1990", "1991", "1992", "1993", "1994", "1995", "1996", "1997", "1998", "1999", "2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009", "2010" ) $output_info = "item_count|item_index|month|year|item_price|item_shipping|item_value|item_title" $output_info | Out-File -FilePath $output_info_path -Append Write-Host $output_info for ($pg = 1; $pg -le $pg_max; $pg++) { $search_url_fragment = "search?q=playboy+magazine&page="+$pg+"&ref=pagination" $search_page_url = $base_url+"/"+$search_url_fragment $ChromeDriver.Navigate().GoToURL($search_page_url) Start-Sleep -Seconds 2 $search_items = $ChromeDriver.FindElementsByClassName("wt-grid__item-xs-6") $item_count = ($search_items | Where { $_.Text.Length -gt 0 } | Select-Object).Count $i = 1 foreach ($search_item in $search_items) { $search_item_text = $search_item.Text if ($search_item_text.Length -gt 0) { $item_index = "$pg-$i" if ($search_item_text.Contains("FREE shipping")) { $item_shipping = "Free" } else { $item_shipping = "Paid" } $search_item_innerHTML = $search_item.getAttribute("innerHTML") $pattern = '<span class="currency-value">.+</span>' $priceMatches1 = ($search_item_innerHTML | Select-String $pattern -AllMatches).Matches $pattern = '">.+</' $priceMatches2 = ($priceMatches1[0] | Select-String $pattern -AllMatches).Matches $item_price = $priceMatches2[0] $item_price = $item_price -replace '">','' $item_price = $item_price -replace '</','' $item_price = $item_price.Trim() $item_price = $item_price -replace '$','' $item_value = [double]$item_price if ($item_shipping -like "Free") { $item_value = $item_value - 8.7 } $pattern = '<h3[\S\s]+v2-listing-card__title[\S\s]+</h3>' $titleMatches1 = ($search_item_innerHTML | Select-String $pattern -AllMatches).Matches $pattern = '">[\S\s]+</' $titleMatches2 = ($titleMatches1[0] | Select-String $pattern -AllMatches).Matches $item_title = $titleMatches2[0] $item_title = $item_title -replace '">','' $item_title = $item_title -replace '</','' $item_title = $item_title.Trim() if ($item_title -like "*Playboy*") { foreach ($year in $year_array) { foreach ($month in $month_array) { $pattern = "*$month $year*" if ($item_title -like $pattern) { $output_info = "$item_count|$item_index|$month|$year|$item_price|$item_shipping|$item_value|$item_title" $output_info | Out-File -FilePath $output_info_path -Append Write-Host $output_info } } } } $i++ } } }
Editor is loading...