from bs4 import BeautifulSoup
from urllib.request import Request, urlopen
from requests_html import HTMLSession
import requests
import re
from urllib.request import build_opener, HTTPCookieProcessor
hdr = {'User-Agent': 'Mozilla/5.0'}
req = Request("https://www.deu.edu.tr//",headers=hdr)
html_page = urlopen(req)
soup = BeautifulSoup(html_page, "lxml")
list_words=[]
links = []
for link in soup.findAll('a'):
links.append(link.get('href'))
print(links)
print(links[0])
opener = build_opener(HTTPCookieProcessor())
flag=True
for i in range(360):
print(i)
if(links[i]!='#'):
try:
req= Request(links[i],headers=hdr)
response=opener.open(req,timeout=30)
content=response.read()
soup = BeautifulSoup(content, "lxml")
tag=soup.body
for string in tag.strings:
words =string.split()
for word in words:
list_words.append(word)
except:
print("Kelime yok")