Untitled
unknown
plain_text
2 years ago
3.0 kB
5
Indexable
import requests import json from time import sleep import datetime import pyodbc # Define the urls and company names urls = { 'https://www.facebook.com/OberrheinHandelsUnion/': 'OHU', 'https://www.facebook.com/BLSIffezheim/': 'BLS', } apiEndPoint = "http://api.scraping-bot.io/scrape/data-scraper" apiEndPointResponse = "http://api.scraping-bot.io/scrape/data-scraper-response?" # Connect to the database connection = pyodbc.connect( 'DRIVER={ODBC Driver 17 for SQL Server};' 'SERVER=192.168.25.172,1433;' 'DATABASE=OHU;' 'UID=portaluser;' 'PWD=F!5&P$VemTK42D;' ) # Define the INSERT query query = "INSERT INTO [W2019PortalDB].[intranetstatistik].dbo.statistik (Art, Anzahl, Datum) VALUES (?, ?, ?)" # Define the username, API key and scraper username = 'ceased' apiKey = 'W2vy3Mm6OvA0k5VLUkNYhVsWs' scraper = 'facebookProfile' # Loop over the urls for url, company_name in urls.items(): # Scrape the url payload = json.dumps({"url": url, "scraper": scraper}) headers = { 'Content-Type': "application/json" } response = requests.request("POST", apiEndPoint, data=payload, auth=(username, apiKey), headers=headers) if response.status_code == 200: print(response.json()) print(response.json()["responseId"]) responseId = response.json()["responseId"] # Wait for the response to be ready pending = True while pending: # Sleep 5s between each loop, social-media scraping can take quite long to complete # so there is no point calling the api quickly as we will return an error if you do so sleep(5) finalResponse = requests.request("GET", apiEndPointResponse + "scraper=" + scraper + "&responseId=" + responseId , auth=(username, apiKey)) result = finalResponse.json() if type(result) is list: pending = False print(finalResponse.text) # Get the follower count followers = result[0]["profile_followers"] # Get the current datetime current_datetime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') # Insert the data into the database cursor = connection.cursor() cursor.execute(query, (company_name + ' Facebook Follower', followers, current_datetime)) cursor.commit() print(f"{company_name} Facebook followers: {followers} inserted into the database at {current_datetime}") elif type(result) is dict: if "status" in result and result["status"] == "pending": print(result["message"]) continue elif result["error"] is not None: pending = False print(json.dumps(result, indent=4)) else: print(response.text)
Editor is loading...