Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
806 B
1
Indexable
Never
import requests
from bs4 import BeautifulSoup

def crawl_webpage(url, depth, max_depth):
    if depth > max_depth:
        return
    
    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            print('  ' * depth + f'[{depth}] {url}')
            
            for link in soup.find_all('a', href=True):
                new_url = link.get('href')
                if new_url and new_url.startswith('http'):
                    crawl_webpage(new_url, depth + 1, max_depth)
    except Exception as e:
        print(f"An error occurred: {e}")

start_url = 'http://example.com'
max_depth = 3  # Set the maximum depth of the tree

print(f"Tree structure of {start_url}:")
crawl_webpage(start_url, 0, max_depth)