Untitled
unknown
plain_text
a year ago
806 B
1
Indexable
Never
import requests from bs4 import BeautifulSoup def crawl_webpage(url, depth, max_depth): if depth > max_depth: return try: response = requests.get(url) if response.status_code == 200: soup = BeautifulSoup(response.text, 'html.parser') print(' ' * depth + f'[{depth}] {url}') for link in soup.find_all('a', href=True): new_url = link.get('href') if new_url and new_url.startswith('http'): crawl_webpage(new_url, depth + 1, max_depth) except Exception as e: print(f"An error occurred: {e}") start_url = 'http://example.com' max_depth = 3 # Set the maximum depth of the tree print(f"Tree structure of {start_url}:") crawl_webpage(start_url, 0, max_depth)