Untitled
import csv import json import os from charset_normalizer import detect # Install via pip: pip install charset-normalizer def excel_column_to_index(column_name): """Convert Excel-style column letters (e.g., 'A', 'AB') to a zero-based index.""" column_name = column_name.upper() index = 0 for char in column_name: index = index * 26 + (ord(char) - ord('A') + 1) return index - 1 def detect_encoding(file_path): """Detect the encoding of the given file.""" with open(file_path, 'rb') as f: raw_data = f.read() result = detect(raw_data) return result['encoding'] def process_csv(input_file): with open('config.json', 'r') as config_file: config = json.load(config_file) columns_to_ignore = config["columns_to_ignore"] file_encoding = detect_encoding(input_file) base_name = os.path.basename(input_file) output_file = f"Remove_{base_name.replace('.csv', '.json')}" with open(input_file, 'r', encoding=file_encoding, errors='replace') as csv_file: reader = csv.reader(csv_file) headers = next(reader) # Determine which column indexes to ignore ignore_indexes = {excel_column_to_index(col) for col in columns_to_ignore} # Filtered column indexes and headers filtered_indexes = [i for i in range(len(headers)) if i not in ignore_indexes] filtered_headers = [headers[i] for i in filtered_indexes] data = [] for row in reader: # Ensure row is aligned with filtered headers filtered_row = {filtered_headers[i]: row[filtered_indexes[i]] for i in range(len(filtered_headers))} data.append(filtered_row) # Write the processed data to a JSON file with open(output_file, 'w', encoding='utf-8') as json_file: json.dump(data, json_file, indent=4) print(f"Processed file saved as: {output_file}") if __name__ == "__main__": input_csv = input("Enter the path to the CSV file: ") process_csv(input_csv)
Leave a Comment