MergeSort Timing

import math
import time
import random
import matplotlib.pyplot as plt
import pandas as pd

basicOpsCounted = 0
def reset():
    global basicOpsCounted
    basicOpsCounted = 0
    pass

def basicOps():
    return basicOpsCounted
    pass

def mergeSort(a):
    global basicOpsCounted
    n = len(a)
    if n == 1:
        return a

    m = n // 2

    b = a[:m]
    c = a[m:n]

    mergeSort(b)
    mergeSort(c)

    merge(a,b,c)
    return a
pass

def merge(a, b, c):
    global basicOpsCounted
    n = len(a)
    m = len(b)
    i = j = k = 0
    while (j < m) and (k < (n - m)):
        basicOpsCounted += 1
        if (b[j] <= c[k]):
            a[i] = b[j]
            j += 1
        else:
            a[i] = c[k]
            k += 1
            
        i += 1
    if (j < m):
        a[i:n] = b[j:m]
    elif (k < (n - m)):
        a[i:n] = c[k:n]
pass

def generate_random_list(n):
    return [random.randint(1, 100000000) for _ in range(n)]

def run_mergesort_tests(k_values):
    sizes = []
    times = []
    operations = []
    nlogn_values = [] 
    estimated_times = []  

    for k in k_values:
        n = 2 ** k
        arr = generate_random_list(n)
        reset()
        start_time = time.time()
        mergeSort(arr)
        end_time = time.time()
        
        
        sizes.append(n)
        time_in_ms = (end_time - start_time) * 1000
        times.append(time_in_ms)
        operations.append(basicOps())
        nlogn_values.append(n * math.log2(n))  

        print(f"n = {n}: Time = {time_in_ms:.6f} ms, Basic Ops = {basicOps()}")

    # Calculate constant 'c' and estimated times
    estimated_times, c_op_estimate, c_time_estimate = calculate_constant_c(operations, nlogn_values, sizes, times)

    plt.figure(figsize=(18, 12))

    # Plot for execution time and basic operations
    plt.subplot(2, 2, 1)
    plt.plot(sizes, times, marker='o', label='Actual Time')
    plt.plot(sizes, estimated_times, marker='x', linestyle='--', label='Estimated Time (n log n)')
    plt.xlabel('Input Size (n)')
    plt.ylabel('Time (seconds)')
    plt.xscale('log')
    plt.yscale('log')
    plt.title('MergeSort Actual vs Estimated Time')
    plt.legend()

    # Plot for basic operations vs n, with n*log(n) for comparison
    plt.subplot(2, 2, 2)
    plt.plot(sizes, operations, marker='o', label='Basic Operations', color='r')  # Actual basic operations
    plt.plot(sizes, nlogn_values, marker='x', linestyle='--', label='Estimated n*log(n)', color='b')  # Estimated n*log(n)
    plt.xlabel('Input Size (n)')
    plt.ylabel('Basic Operations / n*log(n)')
    plt.xscale('log')
    plt.yscale('log')
    plt.title('Basic Operations vs Input Size (n) with n*log(n)')
    plt.legend()


    # Plot for n*log(n) vs basic operations
    plt.subplot(2, 2, 3)
    plt.plot(sizes, nlogn_values, marker='x', linestyle='--', label='n*log(n)', color='b')
    plt.plot(sizes, operations, marker='o', label='Basic Operations', color='r')
    plt.xlabel('Input Size (n)')
    plt.ylabel('Operations / n*log(n)')
    plt.xscale('log')
    plt.yscale('log')
    plt.title('n*log(n) vs Basic Operations')
    plt.legend()

    # Plot for correlation between nlogn_values and operations
    plt.subplot(2, 2, 4)
    plt.plot(sizes, nlogn_values, marker='x', linestyle='--', label='Estimated n*log(n)', color='b')  # Plot n*log(n)
    plt.plot(sizes, operations, marker='o', label='Basic Operations', color='g')  # Plot actual operations
    plt.xlabel('Input Size (n)')
    plt.ylabel('Operations / n*log(n)')
    plt.xscale('log')
    plt.yscale('log')
    plt.title('Correlation between n*log(n) and Basic Operations')
    plt.legend()

    plt.tight_layout()
    plt.show()


def calculate_constant_c(operations, nlogn_values, sizes, times):
    # Estimating constant c for operations
    c_op_values = [ops / nlogn for ops, nlogn in zip(operations, nlogn_values)]
    c_op_estimate = sum(c_op_values) / len(c_op_values)
    
    # Estimating constant c for time
    c_time_values = [time / nlogn for time, nlogn in zip(times, nlogn_values)]
    c_time_estimate = sum(c_time_values) / len(c_time_values)

    print(f"Estimated constant c (Operations) ≈ {c_op_estimate:.4f}")
    print(f"Estimated constant c (Time) ≈ {c_time_estimate:.8f}")

    # Calculate estimated times based on the estimated constant 'c_time'
    estimated_times = [c_time_estimate * nlogn for nlogn in nlogn_values]

    return estimated_times, c_op_estimate, c_time_estimate

k_values = [10, 15, 20]
run_mergesort_tests(k_values)
Editor is loading...