import requests
from googleapiclient.discovery import build
from bs4 import BeautifulSoup
import pandas as pd
my_api_key = 'Your API Key'
my_cse_id = "Your CSE ID"
def google_search(search_term, api_key, cse_id, **kwargs):
service = build("customsearch", "v1", developerKey=api_key)
res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute()
return res
KEYWORDS = ["search engine optimization"]
df = pd.DataFrame(columns=['Keywords', 'URLs', 'Headings', 'Contents'])
for Keyword in KEYWORDS:
try:
result = google_search(Keyword, my_api_key, my_cse_id)
if 'items' in result:
URLS = [item['link'] for item in result['items']]
for URL in URLS:
try:
response = requests.get(URL)
soup = BeautifulSoup(response.content, 'html.parser')
headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
for heading in headings:
heading_type = heading.name.upper()
heading_text = heading.get_text()
df.loc[len(df.index)] = [Keyword, URL, heading_type, heading_text]
except:
df.loc[len(df.index)] = [Keyword, URL, "No Data", "No Data"]
except:
print("No Data")
filename = 'Result.xlsx'
df.to_excel(filename, index=False)
print("Done")