$scrape_folder = "C:\Scrape\Etsy"
$output_info_path = $scrape_folder+"\Etsy-Playboy-data-2021-12-20.txt"
$base_url = "https://www.etsy.com"
$pg_max = 87
$newChrome = $true
if ($newChrome)
{
# Add the working directory to the environment path
$script_path = $scrape_folder
if (($env:Path -split ';') -notcontains $script_path)
{
$env:Path += ";$script_path"
}
# Import Selenium to PowerShell using the Import-Module cmdlet
Import-Module "$($script_path)\WebDriver.dll"
# Create a new ChromeDriver Object instance
$ChromeDriver = New-Object OpenQA.Selenium.Chrome.ChromeDriver
}
$month_array = @(
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December"
)
$year_array = @(
"1990",
"1991",
"1992",
"1993",
"1994",
"1995",
"1996",
"1997",
"1998",
"1999",
"2000",
"2001",
"2002",
"2003",
"2004",
"2005",
"2006",
"2007",
"2008",
"2009",
"2010"
)
$output_info = "item_count|item_index|month|year|item_price|item_shipping|item_value|item_title"
$output_info | Out-File -FilePath $output_info_path -Append
Write-Host $output_info
for ($pg = 1; $pg -le $pg_max; $pg++)
{
$search_url_fragment = "search?q=playboy+magazine&page="+$pg+"&ref=pagination"
$search_page_url = $base_url+"/"+$search_url_fragment
$ChromeDriver.Navigate().GoToURL($search_page_url)
Start-Sleep -Seconds 2
$search_items = $ChromeDriver.FindElementsByClassName("wt-grid__item-xs-6")
$item_count = ($search_items | Where { $_.Text.Length -gt 0 } | Select-Object).Count
$i = 1
foreach ($search_item in $search_items)
{
$search_item_text = $search_item.Text
if ($search_item_text.Length -gt 0)
{
$item_index = "$pg-$i"
if ($search_item_text.Contains("FREE shipping"))
{
$item_shipping = "Free"
}
else
{
$item_shipping = "Paid"
}
$search_item_innerHTML = $search_item.getAttribute("innerHTML")
$pattern = '<span class="currency-value">.+</span>'
$priceMatches1 = ($search_item_innerHTML | Select-String $pattern -AllMatches).Matches
$pattern = '">.+</'
$priceMatches2 = ($priceMatches1[0] | Select-String $pattern -AllMatches).Matches
$item_price = $priceMatches2[0]
$item_price = $item_price -replace '">',''
$item_price = $item_price -replace '</',''
$item_price = $item_price.Trim()
$item_price = $item_price -replace '$',''
$item_value = [double]$item_price
if ($item_shipping -like "Free") { $item_value = $item_value - 8.7 }
$pattern = '<h3[\S\s]+v2-listing-card__title[\S\s]+</h3>'
$titleMatches1 = ($search_item_innerHTML | Select-String $pattern -AllMatches).Matches
$pattern = '">[\S\s]+</'
$titleMatches2 = ($titleMatches1[0] | Select-String $pattern -AllMatches).Matches
$item_title = $titleMatches2[0]
$item_title = $item_title -replace '">',''
$item_title = $item_title -replace '</',''
$item_title = $item_title.Trim()
if ($item_title -like "*Playboy*")
{
foreach ($year in $year_array)
{
foreach ($month in $month_array)
{
$pattern = "*$month $year*"
if ($item_title -like $pattern)
{
$output_info = "$item_count|$item_index|$month|$year|$item_price|$item_shipping|$item_value|$item_title"
$output_info | Out-File -FilePath $output_info_path -Append
Write-Host $output_info
}
}
}
}
$i++
}
}
}