Untitled
unknown
plain_text
2 years ago
806 B
11
Indexable
import requests
from bs4 import BeautifulSoup
def crawl_webpage(url, depth, max_depth):
if depth > max_depth:
return
try:
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
print(' ' * depth + f'[{depth}] {url}')
for link in soup.find_all('a', href=True):
new_url = link.get('href')
if new_url and new_url.startswith('http'):
crawl_webpage(new_url, depth + 1, max_depth)
except Exception as e:
print(f"An error occurred: {e}")
start_url = 'http://example.com'
max_depth = 3 # Set the maximum depth of the tree
print(f"Tree structure of {start_url}:")
crawl_webpage(start_url, 0, max_depth)
Editor is loading...