Untitled

 avatar
unknown
plain_text
8 days ago
3.0 kB
19
Indexable
import json
import re
import os

def extract_failed_entries(log_file_path, json_file_path):
    """
    Parses a log file for failed URLs and filters a JSON file to match those URLs.
    """
    
    # --- Step 1: Extract Failed URLs from Log ---
    failed_urls = set()
    # Regex to find the URL specifically after the phrase "for url: "
    url_pattern = re.compile(r"for url: (https?://\S+)")

    if not os.path.exists(log_file_path):
        print(f"Error: Log file not found at: {log_file_path}")
        return

    print(f"Reading log file: {log_file_path}...")
    try:
        with open(log_file_path, 'r', encoding='utf-8', errors='replace') as f:
            for line in f:
                # specific check for lines indicating a failure or error attempt
                if "failed" in line or "Error" in line:
                    match = url_pattern.search(line)
                    if match:
                        # Add to set to handle duplicates (retries) automatically
                        failed_urls.add(match.group(1))
    except Exception as e:
        print(f"Error reading log file: {e}")
        return

    print(f"Found {len(failed_urls)} unique failed URLs.")

    # --- Step 2: Read and Filter JSON ---
    if not os.path.exists(json_file_path):
        print(f"Error: JSON file not found at: {json_file_path}")
        return

    try:
        with open(json_file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
    except json.JSONDecodeError:
        print("Error: Failed to decode the JSON file. Please check its format.")
        return

    # Check if the expected root key exists
    if "Saved Media" not in data:
        print("Error: The JSON file does not contain the 'Saved Media' key.")
        return

    # Filter the list
    # We compare the URL found in the log against the 'Media Download Url' in the JSON
    # (The logs contain '/dmd/mm?' which matches 'Media Download Url', 
    # whereas 'Download Link' usually contains '/dmd/memories?')
    filtered_items = [
        item for item in data["Saved Media"] 
        if item.get("Media Download Url") in failed_urls
    ]

    # --- Step 3: Build and Print Result ---
    result_json = {"Saved Media": filtered_items}
    
    print("\n--- Result JSON ---\n")
    print(json.dumps(result_json, indent=4))

    # Optional: Save to a file
    # with open('failed_downloads.json', 'w', encoding='utf-8') as f:
    #     json.dump(result_json, f, indent=4)
    # print("\n(Result also saved to failed_downloads.json)")

# --- CONFIGURATION ---
# Replace this with the actual path to your windows log file
# Usage of raw string (r"...") helps avoid issues with backslashes in Windows paths
log_file = r"C:\path\to\your\logfile.log" 
json_file = "memories_history.json"

# Run the function
if __name__ == "__main__":
    # Ensure you update 'log_file' variable above before running
    extract_failed_entries(log_file, json_file)
Editor is loading...
Leave a Comment