Unstructured text file read in Python

 avatar
unknown
python
2 years ago
3.7 kB
11
Indexable
### INPUT 1 (with # EXTRA ROWS 1  2 3 4/ UNEVEN COLUMN):



# Frame_Number = 50
0.AAAAAAAA
1.AAAAAAAA
2.AAAAAAAA
3.AAAAAAAA
4.AAAAAAAA
5.AAAAAAAA
6.AAAAAAAA
7.AAAAAAAA
8.AAAAAAAA
9.AAAAAAAA
# Frame_Number = 51
0.BBBBBBBB
1.BBBBBBBB
2.BBBBBBBB
3.BBBBBBBB
4.BBBBBBBB
5.BBBBBBBB
6.BBBBBBBB
7.BBBBBBBB
8.BBBBBBBB
9.BBBBBBBB
# Frame_Number = 52
0.CCCCCCCC
1.CCCCCCCC
2.CCCCCCCC
3.CCCCCCCC
4.CCCCCCCC
5.CCCCCCCC
6.CCCCCCCC
7.CCCCCCCC
8.CCCCCCCC
9.CCCCCCCC
A.00000000  # EXTRA ROW 1 / UNEVEN COLUMN
B.11111111  # EXTRA ROW 2 / UNEVEN COLUMN
# Frame_Number = 53
0.DDDDDDDD
1.DDDDDDDD
2.DDDDDDDD
3.DDDDDDDD
4.DDDDDDDD
5.DDDDDDDD
6.DDDDDDDD
7.DDDDDDDD
8.DDDDDDDD
9.DDDDDDDD
A.DDDDDDDD  # EXTRA ROW 3 / UNEVEN COLUMN
B.DDDDDDDD  # EXTRA ROW 4 / UNEVEN COLUMN
# Frame_Number = 54
0.EEEEEEEE
1.EEEEEEEE
2.EEEEEEEE
3.EEEEEEEE
4.EEEEEEEE
5.EEEEEEEE
6.EEEEEEEE
7.EEEEEEEE
8.EEEEEEEE
9.EEEEEEEE



### INPUT 2:

# TITLE_RAND = 2
var1: my text zero.
var2: my text one.
var3: my text two.
var4: my text three.
tester1
tester2
# TITLE_RAND = 3
var5: my text four.
var6: my text 55.
var7: my text 6.
var8: my text 7.
tester24
tester34


### PRINTS AND ERROR FOR INPUT 2
len(frame_vals) 0
len(frames) 0
Traceback (most recent call last):
  File "C:\mytest\printtest.py", line 54, in <module>
    convert_csv("teste.txt")
  File "C:\mytest\printtest.py", line 34, in convert_csv
    chunksize = len(frame_vals) // len(frames)
ZeroDivisionError: integer division or modulo by zero
[Finished in 111ms]

# Divide By Zero error: https://i.imgur.com/NKtTIRh.png

# Extra rows /Uneven Columns: https://i.imgur.com/eHgkSh6.png

# Basic example working: https://i.imgur.com/g1mid5g.png



### SCRIPT (with added prints statements)
        # print("len(frame_vals)", len(frame_vals))
        # print("len(frames)", len(frames))
        # print("chunksize", chunksize)



import re
import csv


def convert_csv(filenm):
    "Produces structured data by converting to CSV file"

    # https://stackoverflow.com/questions/3348460/csv-file-written-with-python-has-blank-lines-between-each-row
    with open(filenm, "r") as fin, open("out.txt", "w", newline="") as csvfile:
        csv_writer = csv.writer(
            csvfile, delimiter=" ", quotechar="|", quoting=csv.QUOTE_MINIMAL
        )
        data = fin.readlines()

        # Regex attern for # followed by non-digits followed by = followed by number (integer or float)
        pattern = re.compile(r"# (\D+) = (\d+(?:\.\d+)?)")
        number = re.compile(r"(\d+(?:\.\d+)?)")

        # Header
        frames = [
            f"Frame_{m.group(2)}"
            for line in data
            if (m := pattern.match(line)) and m.group(1) == "Frame_Number"
        ]
        # lines with numbers only
        frame_vals = [
            l for line in data if ((l := line.rstrip()) and (m := number.match(l)))
        ]

        print("len(frame_vals)", len(frame_vals))
        print("len(frames)", len(frames))

        # Dividing frame data into chunks by the number of frames by column
        chunksize = len(frame_vals) // len(frames)

        print("chunksize", chunksize)

        frame_data = list(
            zip(
                *[
                    frame_vals[i : i + chunksize]
                    for i in range(0, len(frame_vals), chunksize)
                ]
            )
        )

        csv_writer.writerow(frames)  # Write header

        # write other data
        for row in frame_data:
            csv_writer.writerow(row)


convert_csv("teste.txt")
Editor is loading...