Web Scraping API Performance Evaluation Script
user_2633811104
powershell
4 years ago
3.2 kB
30
Indexable
$scrape_folder = "C:\Scrape\Etsy"
$base_url = "https://www.etsy.com"
$max_attempts = 5
$max_pages = 5
Write-Host "Start|Time|Name|Page|Attempt|StatusCode|Expanded Link Count"
for ($use_scraper = 1; $use_scraper -le 3; $use_scraper++)
{
$start_time = Get-Date
$pg = 1
DO
{
$search_url_fragment = "search/vintage?q=toys&page="+$pg+"&ref=pagination"
$page_url = $base_url+"/"+$search_url_fragment
if ($use_scraper -eq 1)
{
$name = "scraperapi"
$output_file_path = $scrape_folder+"\output-"+$name+".txt"
$api_url = "https://api.scraperapi.com"
$api_key = "XXXXXXXXXXXX"
$body = @{
api_key=$api_key
url=$page_url
render="true"
premium="true"
country_code="us"
}
}
elseif ($use_scraper -eq 2)
{
$name = "scrapingbee"
$output_file_path = $scrape_folder+"\output-"+$name+".txt"
$api_url = "https://app.scrapingbee.com/api/v1/"
$api_key = "XXXXXXXXXXXX"
$body = @{
api_key=$api_key
url=$page_url
render_js="true"
country_code="us"
premium_proxy="true"
}
}
elseif ($use_scraper -eq 3)
{
$name = "scrapingdog"
$output_file_path = $scrape_folder+"\output-"+$name+".txt"
$api_url = "https://api.scrapingdog.com/scrape"
$api_key = "XXXXXXXXXXXX"
$body = @{
api_key=$api_key
url=$page_url
dynamic="true"
country="us"
premium="true"
}
}
$attempt = 1
$StatusCode = 0
$expanded_link_count = 0
DO
{
Try
{
$page_response = Invoke-WebRequest -Uri $api_url -Method Get -Body $body
$StatusCode = $page_response.StatusCode
if ($StatusCode -eq 200)
{
$page_info = $page_response.AllElements | Where-Object { $_.class -like "*wt-grid__item-xs-6*" }
$expanded_link_count = 0
$page_info | ForEach-Object {
$listing_info = $_.innerHTML
$pattern = '<a.+>'
$linkMatches = ($listing_info | Select-String $pattern -AllMatches).Matches
for ($k = 0; $k -lt $linkMatches.Count; $k++)
{
$listing_link = [string]$linkMatches[$k]
if (($listing_link.Contains('title=')) -and (-not $listing_link.Contains('more-like-this-button=')))
{
$expanded_link_count++
"Page #$pg, Expanded link #$expanded_link_count" | Out-File -FilePath $output_file_path -Append
$listing_link | Out-File -FilePath $output_file_path -Append
}
}
}
}
}
Catch
{
$StatusCode = $_.Exception.Response.StatusCode.value__
}
Write-Host "$start_time|$(Get-Date)|$name|$pg|$attempt|$StatusCode|$expanded_link_count"
Start-Sleep -Seconds 2
$attempt++
} Until (($StatusCode -eq 200) -or ($attempt -gt $max_attempts))
$pg++
} Until ($pg -gt $max_pages)
}Editor is loading...