Untitled
unknown
plain_text
a year ago
1.1 kB
11
Indexable
from resiliparse.parse.html import HTMLTree, Element
def replace_img_with_span(html_doc):
# 解析 HTML 文档
tree = HTMLTree.parse(html_doc)
# 获取 body 下所有 img 元素
img_elements = tree.body.get_elements_by_tag_name("img")
for img in img_elements:
# 获取 alt 属性
alt_text = img.get_attribute("alt")
# 如果没有 alt 属性,则跳过
if not alt_text:
continue
# 创建新的 <span> 元素
span = Element("span")
span.text = alt_text # 设置 <span> 的文本内容
# 替换 <img> 为 <span>
img.replace_with(span)
# 返回修改后的 HTML
return tree.serialize()
# 示例 HTML 文档
html_doc = """
<!DOCTYPE html>
<html>
<head>
<title>Example</title>
</head>
<body>
<img src="image1.jpg" alt="Image 1">
<p>Some text</p>
<img src="image2.jpg">
<img src="image3.jpg" alt="Image 3">
</body>
</html>
"""
# 调用函数
updated_html = replace_img_with_span(html_doc)
# 输出结果
print(updated_html)Editor is loading...
Leave a Comment