Untitled
unknown
plain_text
10 months ago
3.2 kB
7
Indexable
import json
import gzip
import os
import pandas as pd
# IP addresses to filter by
target_ips = ['167.71.174.44', '162.33.179.99']
def process_file(file_path):
"""
Process a single .json.gz file and return a list of dictionaries
for records that match the target source IP addresses.
"""
records = []
try:
with gzip.open(file_path, 'rt', encoding='utf-8') as f:
data = json.load(f)
if 'Records' in data:
for record in data['Records']:
# Get the sourceIPAddress and strip any extra whitespace
src_ip = record.get('sourceIPAddress', '').strip()
if src_ip in target_ips:
event_time = record.get('eventTime', 'N/A')
# Handle userAgent (if it's a list, take the first element)
user_agent = record.get('userAgent', 'N/A')
if isinstance(user_agent, list):
user_agent = user_agent[0] if user_agent else 'N/A'
event_name = record.get('eventName', 'N/A')
username = record.get('userIdentity', {}).get('userName', 'N/A')
# Extract bucketName and Host from requestParameters
request_params = record.get('requestParameters', {})
bucket_name = request_params.get('bucketName', 'N/A')
host = request_params.get('Host', 'N/A')
record_dict = {
'sourceIPAddress': src_ip,
'eventTime': event_time,
'userAgent': user_agent,
'eventName': event_name,
'userIdentity.userName': username,
'bucketName': bucket_name,
'Host': host
}
records.append(record_dict)
except Exception as e:
print(f"Error processing file {file_path}: {e}")
return records
def main():
all_records = []
processed_count = 0
# Walk recursively through the current directory and its subdirectories
for root, dirs, files in os.walk('.'):
for file in files:
if file.endswith('.json.gz'):
file_path = os.path.join(root, file)
file_records = process_file(file_path)
if file_records:
all_records.extend(file_records)
processed_count += 1
# Only process the first 10 files
if processed_count >= 10:
break
if processed_count >= 10:
break
if all_records:
df = pd.DataFrame(all_records)
output_csv = 'filtered_cloudtrail_data.csv'
df.to_csv(output_csv, index=False)
print(f"Saved {len(all_records)} records from {processed_count} files to {output_csv}")
else:
print("No matching records found.")
if __name__ == "__main__":
main()
Editor is loading...
Leave a Comment