Untitled
unknown
plain_text
10 months ago
3.4 kB
7
Indexable
import json
import gzip
import os
import pandas as pd
# Try to import tqdm for a progress bar; if not available, we'll print progress manually.
try:
from tqdm import tqdm
except ImportError:
tqdm = None
# IP addresses to filter by
target_ips = ['167.71.174.44', '162.33.179.99']
def process_file(file_path):
"""
Process a single .json.gz file and return a list of dictionaries
for records that match the target source IP addresses.
"""
records = []
try:
with gzip.open(file_path, 'rt', encoding='utf-8') as f:
data = json.load(f)
if 'Records' in data:
for record in data['Records']:
src_ip = record.get('sourceIPAddress', '').strip()
if src_ip in target_ips:
event_time = record.get('eventTime', 'N/A')
# Handle userAgent (if it's a list, take the first element)
user_agent = record.get('userAgent', 'N/A')
if isinstance(user_agent, list):
user_agent = user_agent[0] if user_agent else 'N/A'
event_name = record.get('eventName', 'N/A')
username = record.get('userIdentity', {}).get('userName', 'N/A')
# Extract bucketName and Host from requestParameters
request_params = record.get('requestParameters', {})
bucket_name = request_params.get('bucketName', 'N/A')
host = request_params.get('Host', 'N/A')
record_dict = {
'sourceIPAddress': src_ip,
'eventTime': event_time,
'userAgent': user_agent,
'eventName': event_name,
'userIdentity.userName': username,
'bucketName': bucket_name,
'Host': host
}
records.append(record_dict)
except Exception as e:
print(f"Error processing file {file_path}: {e}")
return records
def main():
all_records = []
file_paths = []
# Walk through the directory recursively to collect all .json.gz file paths.
for root, dirs, files in os.walk('.'):
for file in files:
if file.endswith('.json.gz'):
file_paths.append(os.path.join(root, file))
total_files = len(file_paths)
print(f"Found {total_files} .json.gz files to process.")
# Use tqdm if available; otherwise, iterate normally.
iterator = tqdm(file_paths, total=total_files, desc="Processing files") if tqdm else file_paths
for idx, file_path in enumerate(iterator, start=1):
file_records = process_file(file_path)
if file_records:
all_records.extend(file_records)
if not tqdm:
print(f"Processed {idx}/{total_files} files.")
if all_records:
df = pd.DataFrame(all_records)
output_csv = 'filtered_cloudtrail_data.csv'
df.to_csv(output_csv, index=False)
print(f"Saved {len(all_records)} records to {output_csv}")
else:
print("No matching records found.")
if __name__ == "__main__":
main()
Editor is loading...
Leave a Comment