Untitled

 avatar
unknown
plain_text
14 days ago
1.1 kB
7
Indexable
from resiliparse.parse.html import HTMLTree, Element

def replace_img_with_span(html_doc):
    # 解析 HTML 文档
    tree = HTMLTree.parse(html_doc)

    # 获取 body 下所有 img 元素
    img_elements = tree.body.get_elements_by_tag_name("img")

    for img in img_elements:
        # 获取 alt 属性
        alt_text = img.get_attribute("alt")

        # 如果没有 alt 属性,则跳过
        if not alt_text:
            continue

        # 创建新的 <span> 元素
        span = Element("span")
        span.text = alt_text  # 设置 <span> 的文本内容

        # 替换 <img> 为 <span>
        img.replace_with(span)

    # 返回修改后的 HTML
    return tree.serialize()

# 示例 HTML 文档
html_doc = """
<!DOCTYPE html>
<html>
<head>
    <title>Example</title>
</head>
<body>
    <img src="image1.jpg" alt="Image 1">
    <p>Some text</p>
    <img src="image2.jpg">
    <img src="image3.jpg" alt="Image 3">
</body>
</html>
"""

# 调用函数
updated_html = replace_img_with_span(html_doc)

# 输出结果
print(updated_html)
Leave a Comment