Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
1.5 kB
2
Indexable
Never
import requests
from googleapiclient.discovery import build
from bs4 import BeautifulSoup
import pandas as pd

my_api_key = 'Your API Key'
my_cse_id = "Your CSE ID" 

def google_search(search_term, api_key, cse_id, **kwargs):
    service = build("customsearch", "v1", developerKey=api_key)
    res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute()
    return res

KEYWORDS = ["search engine optimization"]

df = pd.DataFrame(columns=['Keywords', 'URLs', 'Headings', 'Contents'])

for Keyword in KEYWORDS:
    try:
        result = google_search(Keyword, my_api_key, my_cse_id)
        if 'items' in result:
            URLS = [item['link'] for item in result['items']]
            for URL in URLS:
                try:
                    response = requests.get(URL)
                    soup = BeautifulSoup(response.content, 'html.parser')
                    headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
                    for heading in headings:
                        heading_type = heading.name.upper()
                        heading_text = heading.get_text()
                        df.loc[len(df.index)] = [Keyword, URL, heading_type, heading_text]
                except:
                    df.loc[len(df.index)] = [Keyword, URL, "No Data", "No Data"]
    except:
        print("No Data")

filename = 'Result.xlsx'
df.to_excel(filename, index=False)
print("Done")