Untitled
unknown
plain_text
5 months ago
2.4 kB
3
Indexable
import pandas as pd import numpy as np # Set seed for reproducibility np.random.seed(42) # Number of respondents num_respondents = 384 # Dummy dataset initialization data = { "Gender": np.random.choice(["Male", "Female"], size=num_respondents), "Age": np.random.randint(25, 65, size=num_respondents), "Education_Level": np.random.choice( ["Primary school", "Secondary school", "Diploma, Certificate", "Bachelor, Master, PhD", "No Formal Education"], p=[0.15, 0.4, 0.25, 0.15, 0.05], size=num_respondents), "Marital_Status": np.random.choice(["Single", "Married", "Divorced", "Others"], p=[0.3, 0.6, 0.08, 0.02], size=num_respondents), "Race": np.random.choice(["Malay", "Chinese", "India", "Other"], p=[0.6, 0.2, 0.15, 0.05], size=num_respondents) } # Map religion based on race data["Religion"] = [ "Islam" if race == "Malay" else np.random.choice(["Buddha", "Christian", "Other"], p=[0.6, 0.3, 0.1]) if race == "Chinese" else np.random.choice(["Hindu", "Christian", "Other"], p=[0.7, 0.2, 0.1]) if race == "India" else np.random.choice(["Islam", "Buddha", "Christian", "Hindu", "Other"]) for race in data["Race"] ] data.update({ "Monthly_Income_RM": np.random.normal(2500, 500, size=num_respondents).astype(int), "Household_Size": np.random.poisson(4, size=num_respondents), "Employment_Status": np.random.choice( ["Smallholder Full-time", "Government servant", "Private sector", "Retired", "Self-employed", "Others"], p=[0.7, 0.05, 0.1, 0.05, 0.08, 0.02], size=num_respondents), "MSPO_Certification": ["Yes"] * num_respondents, "Average_Production_Tonnes": np.round(np.random.normal(10, 2, size=num_respondents), 1).clip(5, 20), "Production_Profit_RM": np.random.normal(5000, 1000, size=num_respondents).astype(int).clip(3000, 10000), "Cost_Fertilizer_Before": np.random.normal(500, 50, size=num_respondents).astype(int).clip(300, 800), "Cost_Fertilizer_After": np.random.normal(450, 50, size=num_respondents).astype(int).clip(300, 700), "Yield_Before_Tonnes": np.round(np.random.normal(8, 1.5, size=num_respondents), 1).clip(5, 15), "Yield_After_Tonnes": np.round(np.random.normal(10, 1.5, size=num_respondents), 1).clip(7, 20) }) # Create DataFrame df = pd.DataFrame(data) # Save to Excel df.to_excel("MSPO_Dummy_Data_384_Respondents.xlsx", index=False)
Editor is loading...
Leave a Comment