Untitled
unknown
plain_text
2 months ago
3.2 kB
4
Indexable
import json import gzip import os import pandas as pd # IP addresses to filter by target_ips = ['167.71.174.44', '162.33.179.99'] def process_file(file_path): """ Process a single .json.gz file and return a list of dictionaries for records that match the target source IP addresses. """ records = [] try: with gzip.open(file_path, 'rt', encoding='utf-8') as f: data = json.load(f) if 'Records' in data: for record in data['Records']: # Get the sourceIPAddress and strip any extra whitespace src_ip = record.get('sourceIPAddress', '').strip() if src_ip in target_ips: event_time = record.get('eventTime', 'N/A') # Handle userAgent (if it's a list, take the first element) user_agent = record.get('userAgent', 'N/A') if isinstance(user_agent, list): user_agent = user_agent[0] if user_agent else 'N/A' event_name = record.get('eventName', 'N/A') username = record.get('userIdentity', {}).get('userName', 'N/A') # Extract bucketName and Host from requestParameters request_params = record.get('requestParameters', {}) bucket_name = request_params.get('bucketName', 'N/A') host = request_params.get('Host', 'N/A') record_dict = { 'sourceIPAddress': src_ip, 'eventTime': event_time, 'userAgent': user_agent, 'eventName': event_name, 'userIdentity.userName': username, 'bucketName': bucket_name, 'Host': host } records.append(record_dict) except Exception as e: print(f"Error processing file {file_path}: {e}") return records def main(): all_records = [] processed_count = 0 # Walk recursively through the current directory and its subdirectories for root, dirs, files in os.walk('.'): for file in files: if file.endswith('.json.gz'): file_path = os.path.join(root, file) file_records = process_file(file_path) if file_records: all_records.extend(file_records) processed_count += 1 # Only process the first 10 files if processed_count >= 10: break if processed_count >= 10: break if all_records: df = pd.DataFrame(all_records) output_csv = 'filtered_cloudtrail_data.csv' df.to_csv(output_csv, index=False) print(f"Saved {len(all_records)} records from {processed_count} files to {output_csv}") else: print("No matching records found.") if __name__ == "__main__": main()
Editor is loading...
Leave a Comment