Web Scraping API Performance Evaluation Script
user_2633811104
powershell
3 years ago
3.2 kB
17
Indexable
$scrape_folder = "C:\Scrape\Etsy" $base_url = "https://www.etsy.com" $max_attempts = 5 $max_pages = 5 Write-Host "Start|Time|Name|Page|Attempt|StatusCode|Expanded Link Count" for ($use_scraper = 1; $use_scraper -le 3; $use_scraper++) { $start_time = Get-Date $pg = 1 DO { $search_url_fragment = "search/vintage?q=toys&page="+$pg+"&ref=pagination" $page_url = $base_url+"/"+$search_url_fragment if ($use_scraper -eq 1) { $name = "scraperapi" $output_file_path = $scrape_folder+"\output-"+$name+".txt" $api_url = "https://api.scraperapi.com" $api_key = "XXXXXXXXXXXX" $body = @{ api_key=$api_key url=$page_url render="true" premium="true" country_code="us" } } elseif ($use_scraper -eq 2) { $name = "scrapingbee" $output_file_path = $scrape_folder+"\output-"+$name+".txt" $api_url = "https://app.scrapingbee.com/api/v1/" $api_key = "XXXXXXXXXXXX" $body = @{ api_key=$api_key url=$page_url render_js="true" country_code="us" premium_proxy="true" } } elseif ($use_scraper -eq 3) { $name = "scrapingdog" $output_file_path = $scrape_folder+"\output-"+$name+".txt" $api_url = "https://api.scrapingdog.com/scrape" $api_key = "XXXXXXXXXXXX" $body = @{ api_key=$api_key url=$page_url dynamic="true" country="us" premium="true" } } $attempt = 1 $StatusCode = 0 $expanded_link_count = 0 DO { Try { $page_response = Invoke-WebRequest -Uri $api_url -Method Get -Body $body $StatusCode = $page_response.StatusCode if ($StatusCode -eq 200) { $page_info = $page_response.AllElements | Where-Object { $_.class -like "*wt-grid__item-xs-6*" } $expanded_link_count = 0 $page_info | ForEach-Object { $listing_info = $_.innerHTML $pattern = '<a.+>' $linkMatches = ($listing_info | Select-String $pattern -AllMatches).Matches for ($k = 0; $k -lt $linkMatches.Count; $k++) { $listing_link = [string]$linkMatches[$k] if (($listing_link.Contains('title=')) -and (-not $listing_link.Contains('more-like-this-button='))) { $expanded_link_count++ "Page #$pg, Expanded link #$expanded_link_count" | Out-File -FilePath $output_file_path -Append $listing_link | Out-File -FilePath $output_file_path -Append } } } } } Catch { $StatusCode = $_.Exception.Response.StatusCode.value__ } Write-Host "$start_time|$(Get-Date)|$name|$pg|$attempt|$StatusCode|$expanded_link_count" Start-Sleep -Seconds 2 $attempt++ } Until (($StatusCode -eq 200) -or ($attempt -gt $max_attempts)) $pg++ } Until ($pg -gt $max_pages) }
Editor is loading...