Etsy Playboy Magazine Scrape

 avatar
user_2633811104
powershell
2 years ago
4.2 kB
44
Indexable
Never
$scrape_folder = "C:\Scrape\Etsy"
$output_info_path = $scrape_folder+"\Etsy-Playboy-data-2021-12-20.txt"
$base_url = "https://www.etsy.com"

$pg_max = 87
$newChrome = $true

if ($newChrome)
{
    # Add the working directory to the environment path
    $script_path = $scrape_folder
    if (($env:Path -split ';') -notcontains $script_path)
    {
	    $env:Path += ";$script_path"
    }

    # Import Selenium to PowerShell using the Import-Module cmdlet
    Import-Module "$($script_path)\WebDriver.dll"

    # Create a new ChromeDriver Object instance
    $ChromeDriver = New-Object OpenQA.Selenium.Chrome.ChromeDriver
}

$month_array = @(
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December"
)

$year_array = @(
"1990",
"1991",
"1992",
"1993",
"1994",
"1995",
"1996",
"1997",
"1998",
"1999",
"2000",
"2001",
"2002",
"2003",
"2004",
"2005",
"2006",
"2007",
"2008",
"2009",
"2010"
)

$output_info = "item_count|item_index|month|year|item_price|item_shipping|item_value|item_title"
$output_info | Out-File -FilePath $output_info_path -Append
Write-Host $output_info

for ($pg = 1; $pg -le $pg_max; $pg++)
{

    $search_url_fragment = "search?q=playboy+magazine&page="+$pg+"&ref=pagination"
    $search_page_url = $base_url+"/"+$search_url_fragment

    $ChromeDriver.Navigate().GoToURL($search_page_url)
    Start-Sleep -Seconds 2
    $search_items = $ChromeDriver.FindElementsByClassName("wt-grid__item-xs-6")
    
    $item_count = ($search_items | Where { $_.Text.Length -gt 0 } | Select-Object).Count

    $i = 1
    foreach ($search_item in $search_items)
    {
        $search_item_text = $search_item.Text
        if ($search_item_text.Length -gt 0)
        {
            $item_index = "$pg-$i"

            if ($search_item_text.Contains("FREE shipping"))
            {
                $item_shipping = "Free"
            }
            else
            {
                $item_shipping = "Paid"
            }

            $search_item_innerHTML = $search_item.getAttribute("innerHTML")

            $pattern = '<span class="currency-value">.+</span>'
            $priceMatches1 = ($search_item_innerHTML | Select-String $pattern -AllMatches).Matches
            $pattern = '">.+</'
            $priceMatches2 = ($priceMatches1[0] | Select-String $pattern -AllMatches).Matches
            $item_price = $priceMatches2[0]
            $item_price = $item_price -replace '">',''
            $item_price = $item_price -replace '</',''
            $item_price = $item_price.Trim()
            $item_price = $item_price -replace '$',''
            
            $item_value = [double]$item_price
            if ($item_shipping -like "Free") { $item_value = $item_value - 8.7 }
            
            $pattern = '<h3[\S\s]+v2-listing-card__title[\S\s]+</h3>'
            $titleMatches1 = ($search_item_innerHTML | Select-String $pattern -AllMatches).Matches
            $pattern = '">[\S\s]+</'
            $titleMatches2 = ($titleMatches1[0] | Select-String $pattern -AllMatches).Matches
            $item_title = $titleMatches2[0]
            $item_title = $item_title -replace '">',''
            $item_title = $item_title -replace '</',''
            $item_title = $item_title.Trim()
            
            if ($item_title -like "*Playboy*")
            {
                foreach ($year in $year_array)
                {
                    foreach ($month in $month_array)
                    {
                        $pattern = "*$month $year*"
                        if ($item_title -like $pattern)
                        {
                            $output_info = "$item_count|$item_index|$month|$year|$item_price|$item_shipping|$item_value|$item_title"
                            $output_info | Out-File -FilePath $output_info_path -Append
                            Write-Host $output_info
                        }

                    }

                }
                
            }
            $i++
        }

    }

}