Untitled
unknown
plain_text
3 years ago
3.0 kB
9
Indexable
import requests
import json
from time import sleep
import datetime
import pyodbc
# Define the urls and company names
urls = {
'https://www.facebook.com/OberrheinHandelsUnion/': 'OHU',
'https://www.facebook.com/BLSIffezheim/': 'BLS',
}
apiEndPoint = "http://api.scraping-bot.io/scrape/data-scraper"
apiEndPointResponse = "http://api.scraping-bot.io/scrape/data-scraper-response?"
# Connect to the database
connection = pyodbc.connect(
'DRIVER={ODBC Driver 17 for SQL Server};'
'SERVER=192.168.25.172,1433;'
'DATABASE=OHU;'
'UID=portaluser;'
'PWD=F!5&P$VemTK42D;'
)
# Define the INSERT query
query = "INSERT INTO [W2019PortalDB].[intranetstatistik].dbo.statistik (Art, Anzahl, Datum) VALUES (?, ?, ?)"
# Define the username, API key and scraper
username = 'ceased'
apiKey = 'W2vy3Mm6OvA0k5VLUkNYhVsWs'
scraper = 'facebookProfile'
# Loop over the urls
for url, company_name in urls.items():
# Scrape the url
payload = json.dumps({"url": url, "scraper": scraper})
headers = {
'Content-Type': "application/json"
}
response = requests.request("POST", apiEndPoint, data=payload, auth=(username, apiKey), headers=headers)
if response.status_code == 200:
print(response.json())
print(response.json()["responseId"])
responseId = response.json()["responseId"]
# Wait for the response to be ready
pending = True
while pending:
# Sleep 5s between each loop, social-media scraping can take quite long to complete
# so there is no point calling the api quickly as we will return an error if you do so
sleep(5)
finalResponse = requests.request("GET", apiEndPointResponse + "scraper=" + scraper + "&responseId=" + responseId
, auth=(username, apiKey))
result = finalResponse.json()
if type(result) is list:
pending = False
print(finalResponse.text)
# Get the follower count
followers = result[0]["profile_followers"]
# Get the current datetime
current_datetime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
# Insert the data into the database
cursor = connection.cursor()
cursor.execute(query, (company_name + ' Facebook Follower', followers, current_datetime))
cursor.commit()
print(f"{company_name} Facebook followers: {followers} inserted into the database at {current_datetime}")
elif type(result) is dict:
if "status" in result and result["status"] == "pending":
print(result["message"])
continue
elif result["error"] is not None:
pending = False
print(json.dumps(result, indent=4))
else:
print(response.text)
Editor is loading...