Untitled
unknown
plain_text
a year ago
1.5 kB
1
Indexable
Never
from bs4 import BeautifulSoup import docx # Load the HTML file with open('your_html_file.html', 'r', encoding='utf-8') as file: html_content = file.read() # Create a BeautifulSoup object to parse the HTML soup = BeautifulSoup(html_content, 'html.parser') # Initialize a list to store table rows table_data = [] # Find and extract relevant data for li in soup.find_all('li', href=True): id_value = li['href'].replace('idm', '') problem = li.get_text() info = "" solution = "" for tag in li.find_next_siblings(): if tag.name == 'h2': if "Info" in tag.get_text(): info = tag.find_next('p').get_text() elif "Solution" in tag.get_text(): solution = tag.find_next('p').get_text() elif tag.name == 'li' and tag.get('href') == f'#idm{id_value}': break table_data.append([f"idm{id_value}", problem, info, solution]) # Create a Word document and add a table doc = docx.Document() table = doc.add_table(rows=1, cols=3) table.autofit = False table.allow_autofit = False # Define table column widths (you can adjust these values) table.columns[0].width = docx.shared.Inches(1) table.columns[1].width = docx.shared.Inches(2) table.columns[2].width = docx.shared.Inches(3) # Add data to the table for row_data in table_data: row = table.add_row().cells for i in range(3): row[i].text = row_data[i] # Save the Word document doc.save('output_table.docx')