Web Scraping API Performance Evaluation Script

 avatar
user_2633811104
powershell
3 years ago
3.2 kB
17
Indexable
$scrape_folder = "C:\Scrape\Etsy"
$base_url = "https://www.etsy.com"
$max_attempts = 5
$max_pages = 5
Write-Host "Start|Time|Name|Page|Attempt|StatusCode|Expanded Link Count"
for ($use_scraper = 1; $use_scraper -le 3; $use_scraper++)
{
    $start_time = Get-Date
    $pg = 1
    DO
    {
        $search_url_fragment = "search/vintage?q=toys&page="+$pg+"&ref=pagination"
        $page_url = $base_url+"/"+$search_url_fragment
if ($use_scraper -eq 1)
  {
   $name = "scraperapi"
   $output_file_path = $scrape_folder+"\output-"+$name+".txt"
   $api_url = "https://api.scraperapi.com"
   $api_key = "XXXXXXXXXXXX"
   $body = @{
    api_key=$api_key
    url=$page_url
    render="true"
    premium="true"
    country_code="us"
   }
  }
  elseif ($use_scraper -eq 2)
  {
   $name = "scrapingbee"
   $output_file_path = $scrape_folder+"\output-"+$name+".txt"
   $api_url = "https://app.scrapingbee.com/api/v1/"
   $api_key = "XXXXXXXXXXXX"
   $body = @{
    api_key=$api_key
    url=$page_url
    render_js="true"
    country_code="us"
    premium_proxy="true"
   }
  }
  elseif ($use_scraper -eq 3)
  {
   $name = "scrapingdog"
   $output_file_path = $scrape_folder+"\output-"+$name+".txt"
   $api_url = "https://api.scrapingdog.com/scrape"
   $api_key = "XXXXXXXXXXXX"
   $body = @{
    api_key=$api_key
    url=$page_url
    dynamic="true"
    country="us"
    premium="true"
   }
  }
$attempt = 1
        $StatusCode = 0
  $expanded_link_count = 0
        DO
        {
            Try
            {
                $page_response = Invoke-WebRequest -Uri $api_url -Method Get -Body $body
                $StatusCode = $page_response.StatusCode
                if ($StatusCode -eq 200)
                {
                    $page_info = $page_response.AllElements | Where-Object { $_.class -like "*wt-grid__item-xs-6*" }
$expanded_link_count = 0
                    $page_info | ForEach-Object {
                        $listing_info = $_.innerHTML
                        $pattern = '<a.+>'
                        $linkMatches = ($listing_info | Select-String $pattern -AllMatches).Matches
                        for ($k = 0; $k -lt $linkMatches.Count; $k++)
                        {
                            $listing_link = [string]$linkMatches[$k]
                            if (($listing_link.Contains('title=')) -and (-not $listing_link.Contains('more-like-this-button=')))
                            {
                                $expanded_link_count++
                                "Page #$pg, Expanded link #$expanded_link_count" | Out-File -FilePath $output_file_path -Append
                                $listing_link | Out-File -FilePath $output_file_path -Append
                            
                            }
                        }
                    }
                }
            }
            Catch
            {
                $StatusCode = $_.Exception.Response.StatusCode.value__
            }
            Write-Host "$start_time|$(Get-Date)|$name|$pg|$attempt|$StatusCode|$expanded_link_count"
            Start-Sleep -Seconds 2
        $attempt++
        } Until (($StatusCode -eq 200) -or ($attempt -gt $max_attempts))
    $pg++
    } Until ($pg -gt $max_pages)
}
Editor is loading...