Untitled
unknown
plain_text
8 days ago
3.0 kB
19
Indexable
import json
import re
import os
def extract_failed_entries(log_file_path, json_file_path):
"""
Parses a log file for failed URLs and filters a JSON file to match those URLs.
"""
# --- Step 1: Extract Failed URLs from Log ---
failed_urls = set()
# Regex to find the URL specifically after the phrase "for url: "
url_pattern = re.compile(r"for url: (https?://\S+)")
if not os.path.exists(log_file_path):
print(f"Error: Log file not found at: {log_file_path}")
return
print(f"Reading log file: {log_file_path}...")
try:
with open(log_file_path, 'r', encoding='utf-8', errors='replace') as f:
for line in f:
# specific check for lines indicating a failure or error attempt
if "failed" in line or "Error" in line:
match = url_pattern.search(line)
if match:
# Add to set to handle duplicates (retries) automatically
failed_urls.add(match.group(1))
except Exception as e:
print(f"Error reading log file: {e}")
return
print(f"Found {len(failed_urls)} unique failed URLs.")
# --- Step 2: Read and Filter JSON ---
if not os.path.exists(json_file_path):
print(f"Error: JSON file not found at: {json_file_path}")
return
try:
with open(json_file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
except json.JSONDecodeError:
print("Error: Failed to decode the JSON file. Please check its format.")
return
# Check if the expected root key exists
if "Saved Media" not in data:
print("Error: The JSON file does not contain the 'Saved Media' key.")
return
# Filter the list
# We compare the URL found in the log against the 'Media Download Url' in the JSON
# (The logs contain '/dmd/mm?' which matches 'Media Download Url',
# whereas 'Download Link' usually contains '/dmd/memories?')
filtered_items = [
item for item in data["Saved Media"]
if item.get("Media Download Url") in failed_urls
]
# --- Step 3: Build and Print Result ---
result_json = {"Saved Media": filtered_items}
print("\n--- Result JSON ---\n")
print(json.dumps(result_json, indent=4))
# Optional: Save to a file
# with open('failed_downloads.json', 'w', encoding='utf-8') as f:
# json.dump(result_json, f, indent=4)
# print("\n(Result also saved to failed_downloads.json)")
# --- CONFIGURATION ---
# Replace this with the actual path to your windows log file
# Usage of raw string (r"...") helps avoid issues with backslashes in Windows paths
log_file = r"C:\path\to\your\logfile.log"
json_file = "memories_history.json"
# Run the function
if __name__ == "__main__":
# Ensure you update 'log_file' variable above before running
extract_failed_entries(log_file, json_file)Editor is loading...
Leave a Comment