Untitled

 avatar
unknown
plain_text
2 months ago
2.0 kB
6
Indexable
import csv
import json
import os
from charset_normalizer import detect  # Install via pip: pip install charset-normalizer


def excel_column_to_index(column_name):
    """Convert Excel-style column letters (e.g., 'A', 'AB') to a zero-based index."""
    column_name = column_name.upper()
    index = 0
    for char in column_name:
        index = index * 26 + (ord(char) - ord('A') + 1)
    return index - 1


def detect_encoding(file_path):
    """Detect the encoding of the given file."""
    with open(file_path, 'rb') as f:
        raw_data = f.read()
    result = detect(raw_data)
    return result['encoding']


def process_csv(input_file):
    with open('config.json', 'r') as config_file:
        config = json.load(config_file)

    columns_to_ignore = config["columns_to_ignore"]

    file_encoding = detect_encoding(input_file)

    base_name = os.path.basename(input_file)
    output_file = f"Remove_{base_name.replace('.csv', '.json')}"

    with open(input_file, 'r', encoding=file_encoding, errors='replace') as csv_file:
        reader = csv.reader(csv_file)
        headers = next(reader)

        # Determine which column indexes to ignore
        ignore_indexes = {excel_column_to_index(col) for col in columns_to_ignore}

        # Filtered column indexes and headers
        filtered_indexes = [i for i in range(len(headers)) if i not in ignore_indexes]
        filtered_headers = [headers[i] for i in filtered_indexes]

        data = []
        for row in reader:
            # Ensure row is aligned with filtered headers
            filtered_row = {filtered_headers[i]: row[filtered_indexes[i]] for i in range(len(filtered_headers))}
            data.append(filtered_row)

    # Write the processed data to a JSON file
    with open(output_file, 'w', encoding='utf-8') as json_file:
        json.dump(data, json_file, indent=4)

    print(f"Processed file saved as: {output_file}")


if __name__ == "__main__":
    input_csv = input("Enter the path to the CSV file: ")
    process_csv(input_csv)

Leave a Comment