Untitled
unknown
plain_text
a year ago
2.0 kB
9
Indexable
import csv
import json
import os
from charset_normalizer import detect # Install via pip: pip install charset-normalizer
def excel_column_to_index(column_name):
"""Convert Excel-style column letters (e.g., 'A', 'AB') to a zero-based index."""
column_name = column_name.upper()
index = 0
for char in column_name:
index = index * 26 + (ord(char) - ord('A') + 1)
return index - 1
def detect_encoding(file_path):
"""Detect the encoding of the given file."""
with open(file_path, 'rb') as f:
raw_data = f.read()
result = detect(raw_data)
return result['encoding']
def process_csv(input_file):
with open('config.json', 'r') as config_file:
config = json.load(config_file)
columns_to_ignore = config["columns_to_ignore"]
file_encoding = detect_encoding(input_file)
base_name = os.path.basename(input_file)
output_file = f"Remove_{base_name.replace('.csv', '.json')}"
with open(input_file, 'r', encoding=file_encoding, errors='replace') as csv_file:
reader = csv.reader(csv_file)
headers = next(reader)
# Determine which column indexes to ignore
ignore_indexes = {excel_column_to_index(col) for col in columns_to_ignore}
# Filtered column indexes and headers
filtered_indexes = [i for i in range(len(headers)) if i not in ignore_indexes]
filtered_headers = [headers[i] for i in filtered_indexes]
data = []
for row in reader:
# Ensure row is aligned with filtered headers
filtered_row = {filtered_headers[i]: row[filtered_indexes[i]] for i in range(len(filtered_headers))}
data.append(filtered_row)
# Write the processed data to a JSON file
with open(output_file, 'w', encoding='utf-8') as json_file:
json.dump(data, json_file, indent=4)
print(f"Processed file saved as: {output_file}")
if __name__ == "__main__":
input_csv = input("Enter the path to the CSV file: ")
process_csv(input_csv)
Editor is loading...
Leave a Comment