Скрипт-парсер

Скрипт для рисования границ для таблиц в док файле, одновременно с парсингом полей и дублирования их описания в других полях
mail@pastecode.io avatar
unknown
python
a year ago
2.6 kB
1
Indexable
Never
from docx import Document
from docx.oxml import OxmlElement
from docx.oxml.ns import qn


def set_table_borders(table, size, color):
    for row in table.rows:
        for cell in row.cells:
            tcPr = cell._element.tcPr
            tc_borders = tcPr.xpath('./*[local-name()="tcBorders"]')[0] if tcPr.xpath(
                './*[local-name()="tcBorders"]') else OxmlElement("w:tcBorders")
            for border_element in ["top", "left", "bottom", "right"]:
                border = tc_borders.xpath(f'./*[local-name()="{border_element}"]')[0] if tc_borders.xpath(
                    f'./*[local-name()="{border_element}"]') else OxmlElement("w:" + border_element)
                border.set(qn("w:val"), "single")
                border.set(qn("w:sz"), str(size))
                border.set(qn("w:color"), color)
                tc_borders.append(border)
            if not tcPr.xpath('./*[local-name()="tcBorders"]'):
                tcPr.append(tc_borders)


def fill_empty_descriptions(doc):
    descriptions = {}

    # Collect descriptions from all tables
    for table in doc.tables:
        for row in table.rows:
            id_cell = row.cells[0]
            id_text = id_cell.text.strip()
            description_cell = row.cells[3]
            description_text = description_cell.text.strip()

            if id_text and description_text:
                descriptions[id_text] = description_text

    # Fill empty descriptions in all tables
    for table in doc.tables:
        for row in table.rows:
            id_cell = row.cells[0]
            id_text = id_cell.text.strip()
            description_cell = row.cells[3]
            description_text = description_cell.text.strip()

            if id_text and not description_text:
                if id_text in descriptions:
                    description_cell.text = descriptions[id_text]


def main():
    # Load the .docx file
    doc = Document("какое то название.docx")

    # Define the border size and color
    border_size = 10  # Border size in twips (1 point = 20 twips, so 0.5 points = 10 twips)
    border_color = "000000"  # Black color in RGB hex format

    # Iterate through all the tables and set the borders
    for table in doc.tables:
        set_table_borders(table, border_size, border_color)

    # Fill empty descriptions based on the IDs
    fill_empty_descriptions(doc)

    # Save the modified .docx file
    doc.save("output.docx")

if __name__ == "__main__":
    main()