Untitled
unknown
plain_text
a year ago
1.5 kB
2
Indexable
Never
import requests from googleapiclient.discovery import build from bs4 import BeautifulSoup import pandas as pd my_api_key = 'Your API Key' my_cse_id = "Your CSE ID" def google_search(search_term, api_key, cse_id, **kwargs): service = build("customsearch", "v1", developerKey=api_key) res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute() return res KEYWORDS = ["search engine optimization"] df = pd.DataFrame(columns=['Keywords', 'URLs', 'Headings', 'Contents']) for Keyword in KEYWORDS: try: result = google_search(Keyword, my_api_key, my_cse_id) if 'items' in result: URLS = [item['link'] for item in result['items']] for URL in URLS: try: response = requests.get(URL) soup = BeautifulSoup(response.content, 'html.parser') headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']) for heading in headings: heading_type = heading.name.upper() heading_text = heading.get_text() df.loc[len(df.index)] = [Keyword, URL, heading_type, heading_text] except: df.loc[len(df.index)] = [Keyword, URL, "No Data", "No Data"] except: print("No Data") filename = 'Result.xlsx' df.to_excel(filename, index=False) print("Done")