import random
from collections import defaultdict
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
def load_numbers_from_file(filename):
with open(filename, 'r') as f:
numbers = [int(line.strip()) for line in f]
return numbers
def count_selections_in_buckets(selections, bucket_size=600):
counts = defaultdict(int)
for number in selections:
bucket = number // bucket_size
counts[bucket] += 1
return counts
def run_simulation(filename, num_trials=1000, num_selections=89, bucket_size=600):
all_numbers = load_numbers_from_file(filename)
results = []
for _ in range(num_trials):
selected_numbers = random.sample(all_numbers, num_selections)
bucket_counts = count_selections_in_buckets(selected_numbers, bucket_size)
results.append(bucket_counts)
return results
def analyze_results(results, bucket_size=600):
bucket_totals = defaultdict(list)
for trial in results:
for bucket, count in trial.items():
bucket_totals[bucket].append(count)
summary = {}
for bucket, counts in bucket_totals.items():
mean = np.mean(counts)
std_dev = np.std(counts)
bucket_range = (min(counts), max(counts))
p25, p75, p90 = np.percentile(counts, [25, 75, 90])
summary[bucket] = {
'mean_percentage': mean / 89 * 100,
'std_dev': std_dev,
'range': bucket_range,
'p25': p25,
'p75': p75,
'p90': p90
}
return summary
def results_to_dataframe(results, bucket_size=600):
data = []
for i, trial in enumerate(results):
for bucket, count in trial.items():
data.append({'trial': i, 'bucket': f"{bucket * bucket_size}-{bucket * bucket_size + 599}", 'count': count})
df = pd.DataFrame(data)
return df
filename = 'inputnumnum' # Replace this with your numbers file This is all the people who selected beeple as first choice
results = run_simulation(filename)
summary = analyze_results(results)
df = results_to_dataframe(results)
def results_to_dataframe(results, bucket_size=600):
data = []
for i, trial in enumerate(results):
for bucket, count in trial.items():
bucket_start = bucket * bucket_size
data.append({
'trial': i,
'bucket': f"{bucket_start}-{bucket_start + 599}",
'count': count,
'bucket_start': bucket_start
})
df = pd.DataFrame(data)
df = df.sort_values('bucket_start')
return df
results = run_simulation(filename)
summary = analyze_results(results)
df = results_to_dataframe(results)
specified_counts = {
"0-599": 13,
"600-1199": 18,
"1200-1799": 8,
"1800-2399": 10,
"2400-2999": 11,
"3000-3599": 13,
"3600-4199": 4,
"4200-4799": 2
}
sns.set(style="whitegrid")
plt.figure(figsize=(12, 6))
sns.boxenplot(x='bucket', y='count', data=df, palette="Set3")
plt.title('Boxen Plot of Raw Results for 1000 Simulations')
plt.xlabel('Bucket')
plt.ylabel('Count')
plt.xticks(rotation=45)
# Plot the red line for specified_counts
x = list(specified_counts.keys())
y = list(specified_counts.values())
plt.plot(x, y, color='red', marker='o', linestyle='none', linewidth=2, markersize=6)
plt.show()