Untitled

 avatar
unknown
plain_text
6 months ago
1.1 kB
5
Indexable
import os
import re

# Directory to scan
directory_to_scan = 'path_to_your_directory'

# Set to store unique IDs
unique_ids = set()

# Regex pattern to match "id: xxx"
pattern = re.compile(r'id: (\d+)')

# Scan the directory
for root, dirs, files in os.walk(directory_to_scan):
    for file in files:
        if file.endswith('.txt'):  # Modify the file extension if needed
            file_path = os.path.join(root, file)
            with open(file_path, 'r', encoding='utf-8') as f:
                for line in f:
                    match = pattern.search(line)
                    if match:
                        unique_ids.add(match.group(1))

# Create a single output file with all unique IDs in the desired format
output_file_path = 'output_directory/all_ids.txt'  # Modify the output file path as needed
os.makedirs(os.path.dirname(output_file_path), exist_ok=True)

with open(output_file_path, 'w', encoding='utf-8') as f:
    f.write(",\n".join(f'"{id_value}"' for id_value in sorted(unique_ids)))

print("Unique IDs written to a single file successfully!")
Editor is loading...
Leave a Comment