Untitled

mail@pastecode.io avatar
unknown
plain_text
2 years ago
3.3 kB
17
Indexable
import random
from collections import defaultdict
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

def load_numbers_from_file(filename):
    with open(filename, 'r') as f:
        numbers = [int(line.strip()) for line in f]
    return numbers

def count_selections_in_buckets(selections, bucket_size=600):
    counts = defaultdict(int)
    for number in selections:
        bucket = number // bucket_size
        counts[bucket] += 1
    return counts

def run_simulation(filename, num_trials=1000, num_selections=89, bucket_size=600):
    all_numbers = load_numbers_from_file(filename)
    results = []

    for _ in range(num_trials):
        selected_numbers = random.sample(all_numbers, num_selections)
        bucket_counts = count_selections_in_buckets(selected_numbers, bucket_size)
        results.append(bucket_counts)

    return results

def analyze_results(results, bucket_size=600):
    bucket_totals = defaultdict(list)
    for trial in results:
        for bucket, count in trial.items():
            bucket_totals[bucket].append(count)

    summary = {}
    for bucket, counts in bucket_totals.items():
        mean = np.mean(counts)
        std_dev = np.std(counts)
        bucket_range = (min(counts), max(counts))
        p25, p75, p90 = np.percentile(counts, [25, 75, 90])
        summary[bucket] = {
            'mean_percentage': mean / 89 * 100,
            'std_dev': std_dev,
            'range': bucket_range,
            'p25': p25,
            'p75': p75,
            'p90': p90
        }

    return summary


def results_to_dataframe(results, bucket_size=600):
    data = []
    for i, trial in enumerate(results):
        for bucket, count in trial.items():
            data.append({'trial': i, 'bucket': f"{bucket * bucket_size}-{bucket * bucket_size + 599}", 'count': count})
    df = pd.DataFrame(data)
    return df


filename = 'inputnumnum'  # Replace this with your numbers file This is all the people who selected beeple as first choice
results = run_simulation(filename)
summary = analyze_results(results)
df = results_to_dataframe(results)

def results_to_dataframe(results, bucket_size=600):
    data = []
    for i, trial in enumerate(results):
        for bucket, count in trial.items():
            bucket_start = bucket * bucket_size
            data.append({
                'trial': i, 
                'bucket': f"{bucket_start}-{bucket_start + 599}", 
                'count': count,
                'bucket_start': bucket_start
            })
    df = pd.DataFrame(data)
    df = df.sort_values('bucket_start')
    return df

results = run_simulation(filename)
summary = analyze_results(results)
df = results_to_dataframe(results)

specified_counts = {
    "0-599": 13,
    "600-1199": 18,
    "1200-1799": 8,
    "1800-2399": 10,
    "2400-2999": 11,
    "3000-3599": 13,
    "3600-4199": 4,
    "4200-4799": 2
}

sns.set(style="whitegrid")
plt.figure(figsize=(12, 6))
sns.boxenplot(x='bucket', y='count', data=df, palette="Set3")
plt.title('Boxen Plot of Raw Results for 1000 Simulations')
plt.xlabel('Bucket')
plt.ylabel('Count')
plt.xticks(rotation=45)

# Plot the red line for specified_counts
x = list(specified_counts.keys())
y = list(specified_counts.values())
plt.plot(x, y, color='red', marker='o', linestyle='none', linewidth=2, markersize=6)

plt.show()