Untitled
unknown
plain_text
7 months ago
3.3 kB
13
Indexable
Never
import random from collections import defaultdict import numpy as np import seaborn as sns import matplotlib.pyplot as plt def load_numbers_from_file(filename): with open(filename, 'r') as f: numbers = [int(line.strip()) for line in f] return numbers def count_selections_in_buckets(selections, bucket_size=600): counts = defaultdict(int) for number in selections: bucket = number // bucket_size counts[bucket] += 1 return counts def run_simulation(filename, num_trials=1000, num_selections=89, bucket_size=600): all_numbers = load_numbers_from_file(filename) results = [] for _ in range(num_trials): selected_numbers = random.sample(all_numbers, num_selections) bucket_counts = count_selections_in_buckets(selected_numbers, bucket_size) results.append(bucket_counts) return results def analyze_results(results, bucket_size=600): bucket_totals = defaultdict(list) for trial in results: for bucket, count in trial.items(): bucket_totals[bucket].append(count) summary = {} for bucket, counts in bucket_totals.items(): mean = np.mean(counts) std_dev = np.std(counts) bucket_range = (min(counts), max(counts)) p25, p75, p90 = np.percentile(counts, [25, 75, 90]) summary[bucket] = { 'mean_percentage': mean / 89 * 100, 'std_dev': std_dev, 'range': bucket_range, 'p25': p25, 'p75': p75, 'p90': p90 } return summary def results_to_dataframe(results, bucket_size=600): data = [] for i, trial in enumerate(results): for bucket, count in trial.items(): data.append({'trial': i, 'bucket': f"{bucket * bucket_size}-{bucket * bucket_size + 599}", 'count': count}) df = pd.DataFrame(data) return df filename = 'inputnumnum' # Replace this with your numbers file This is all the people who selected beeple as first choice results = run_simulation(filename) summary = analyze_results(results) df = results_to_dataframe(results) def results_to_dataframe(results, bucket_size=600): data = [] for i, trial in enumerate(results): for bucket, count in trial.items(): bucket_start = bucket * bucket_size data.append({ 'trial': i, 'bucket': f"{bucket_start}-{bucket_start + 599}", 'count': count, 'bucket_start': bucket_start }) df = pd.DataFrame(data) df = df.sort_values('bucket_start') return df results = run_simulation(filename) summary = analyze_results(results) df = results_to_dataframe(results) specified_counts = { "0-599": 13, "600-1199": 18, "1200-1799": 8, "1800-2399": 10, "2400-2999": 11, "3000-3599": 13, "3600-4199": 4, "4200-4799": 2 } sns.set(style="whitegrid") plt.figure(figsize=(12, 6)) sns.boxenplot(x='bucket', y='count', data=df, palette="Set3") plt.title('Boxen Plot of Raw Results for 1000 Simulations') plt.xlabel('Bucket') plt.ylabel('Count') plt.xticks(rotation=45) # Plot the red line for specified_counts x = list(specified_counts.keys()) y = list(specified_counts.values()) plt.plot(x, y, color='red', marker='o', linestyle='none', linewidth=2, markersize=6) plt.show()