Unstructured text file read in Python
unknown
python
2 years ago
3.7 kB
11
Indexable
### INPUT 1 (with # EXTRA ROWS 1 2 3 4/ UNEVEN COLUMN): # Frame_Number = 50 0.AAAAAAAA 1.AAAAAAAA 2.AAAAAAAA 3.AAAAAAAA 4.AAAAAAAA 5.AAAAAAAA 6.AAAAAAAA 7.AAAAAAAA 8.AAAAAAAA 9.AAAAAAAA # Frame_Number = 51 0.BBBBBBBB 1.BBBBBBBB 2.BBBBBBBB 3.BBBBBBBB 4.BBBBBBBB 5.BBBBBBBB 6.BBBBBBBB 7.BBBBBBBB 8.BBBBBBBB 9.BBBBBBBB # Frame_Number = 52 0.CCCCCCCC 1.CCCCCCCC 2.CCCCCCCC 3.CCCCCCCC 4.CCCCCCCC 5.CCCCCCCC 6.CCCCCCCC 7.CCCCCCCC 8.CCCCCCCC 9.CCCCCCCC A.00000000 # EXTRA ROW 1 / UNEVEN COLUMN B.11111111 # EXTRA ROW 2 / UNEVEN COLUMN # Frame_Number = 53 0.DDDDDDDD 1.DDDDDDDD 2.DDDDDDDD 3.DDDDDDDD 4.DDDDDDDD 5.DDDDDDDD 6.DDDDDDDD 7.DDDDDDDD 8.DDDDDDDD 9.DDDDDDDD A.DDDDDDDD # EXTRA ROW 3 / UNEVEN COLUMN B.DDDDDDDD # EXTRA ROW 4 / UNEVEN COLUMN # Frame_Number = 54 0.EEEEEEEE 1.EEEEEEEE 2.EEEEEEEE 3.EEEEEEEE 4.EEEEEEEE 5.EEEEEEEE 6.EEEEEEEE 7.EEEEEEEE 8.EEEEEEEE 9.EEEEEEEE ### INPUT 2: # TITLE_RAND = 2 var1: my text zero. var2: my text one. var3: my text two. var4: my text three. tester1 tester2 # TITLE_RAND = 3 var5: my text four. var6: my text 55. var7: my text 6. var8: my text 7. tester24 tester34 ### PRINTS AND ERROR FOR INPUT 2 len(frame_vals) 0 len(frames) 0 Traceback (most recent call last): File "C:\mytest\printtest.py", line 54, in <module> convert_csv("teste.txt") File "C:\mytest\printtest.py", line 34, in convert_csv chunksize = len(frame_vals) // len(frames) ZeroDivisionError: integer division or modulo by zero [Finished in 111ms] # Divide By Zero error: https://i.imgur.com/NKtTIRh.png # Extra rows /Uneven Columns: https://i.imgur.com/eHgkSh6.png # Basic example working: https://i.imgur.com/g1mid5g.png ### SCRIPT (with added prints statements) # print("len(frame_vals)", len(frame_vals)) # print("len(frames)", len(frames)) # print("chunksize", chunksize) import re import csv def convert_csv(filenm): "Produces structured data by converting to CSV file" # https://stackoverflow.com/questions/3348460/csv-file-written-with-python-has-blank-lines-between-each-row with open(filenm, "r") as fin, open("out.txt", "w", newline="") as csvfile: csv_writer = csv.writer( csvfile, delimiter=" ", quotechar="|", quoting=csv.QUOTE_MINIMAL ) data = fin.readlines() # Regex attern for # followed by non-digits followed by = followed by number (integer or float) pattern = re.compile(r"# (\D+) = (\d+(?:\.\d+)?)") number = re.compile(r"(\d+(?:\.\d+)?)") # Header frames = [ f"Frame_{m.group(2)}" for line in data if (m := pattern.match(line)) and m.group(1) == "Frame_Number" ] # lines with numbers only frame_vals = [ l for line in data if ((l := line.rstrip()) and (m := number.match(l))) ] print("len(frame_vals)", len(frame_vals)) print("len(frames)", len(frames)) # Dividing frame data into chunks by the number of frames by column chunksize = len(frame_vals) // len(frames) print("chunksize", chunksize) frame_data = list( zip( *[ frame_vals[i : i + chunksize] for i in range(0, len(frame_vals), chunksize) ] ) ) csv_writer.writerow(frames) # Write header # write other data for row in frame_data: csv_writer.writerow(row) convert_csv("teste.txt")
Editor is loading...