Untitled

 avatar
unknown
plain_text
2 years ago
3.0 kB
5
Indexable
import requests
import json
from time import sleep
import datetime
import pyodbc

# Define the urls and company names
urls = {
    'https://www.facebook.com/OberrheinHandelsUnion/': 'OHU',
    'https://www.facebook.com/BLSIffezheim/': 'BLS',
}

apiEndPoint = "http://api.scraping-bot.io/scrape/data-scraper"
apiEndPointResponse = "http://api.scraping-bot.io/scrape/data-scraper-response?"

# Connect to the database
connection = pyodbc.connect(
    'DRIVER={ODBC Driver 17 for SQL Server};'
    'SERVER=192.168.25.172,1433;'
    'DATABASE=OHU;'
    'UID=portaluser;'
    'PWD=F!5&P$VemTK42D;'
)

# Define the INSERT query
query = "INSERT INTO [W2019PortalDB].[intranetstatistik].dbo.statistik (Art, Anzahl, Datum) VALUES (?, ?, ?)"

# Define the username, API key and scraper
username = 'ceased'
apiKey = 'W2vy3Mm6OvA0k5VLUkNYhVsWs'
scraper = 'facebookProfile'

# Loop over the urls
for url, company_name in urls.items():

    # Scrape the url
    payload = json.dumps({"url": url, "scraper": scraper})
    headers = {
        'Content-Type': "application/json"
    }
    response = requests.request("POST", apiEndPoint, data=payload, auth=(username, apiKey), headers=headers)
    if response.status_code == 200:
        print(response.json())
        print(response.json()["responseId"])
        responseId = response.json()["responseId"]

        # Wait for the response to be ready
        pending = True
        while pending:
            # Sleep 5s between each loop, social-media scraping can take quite long to complete
            # so there is no point calling the api quickly as we will return an error if you do so
            sleep(5)
            finalResponse = requests.request("GET", apiEndPointResponse + "scraper=" + scraper + "&responseId=" + responseId
                                             , auth=(username, apiKey))
            result = finalResponse.json()
            if type(result) is list:
                pending = False
                print(finalResponse.text)

                # Get the follower count
                followers = result[0]["profile_followers"]

                # Get the current datetime
                current_datetime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')

                # Insert the data into the database
                cursor = connection.cursor()
                cursor.execute(query, (company_name + ' Facebook Follower', followers, current_datetime))
                cursor.commit()
                print(f"{company_name} Facebook followers: {followers} inserted into the database at {current_datetime}")

            elif type(result) is dict:
                if "status" in result and result["status"] == "pending":
                    print(result["message"])
                    continue
                elif result["error"] is not None:
                    pending = False
                    print(json.dumps(result, indent=4))

    else:
        print(response.text)
Editor is loading...