Untitled

import pandas as pd
import numpy as np

# Set seed for reproducibility
np.random.seed(42)

# Number of respondents
num_respondents = 384

# Dummy dataset initialization
data = {
    "Gender": np.random.choice(["Male", "Female"], size=num_respondents),
    "Age": np.random.randint(25, 65, size=num_respondents),
    "Education_Level": np.random.choice(
        ["Primary school", "Secondary school", "Diploma, Certificate", "Bachelor, Master, PhD", "No Formal Education"],
        p=[0.15, 0.4, 0.25, 0.15, 0.05],
        size=num_respondents),
    "Marital_Status": np.random.choice(["Single", "Married", "Divorced", "Others"], p=[0.3, 0.6, 0.08, 0.02], size=num_respondents),
    "Race": np.random.choice(["Malay", "Chinese", "India", "Other"], p=[0.6, 0.2, 0.15, 0.05], size=num_respondents)
}

# Map religion based on race
data["Religion"] = [
    "Islam" if race == "Malay" else
    np.random.choice(["Buddha", "Christian", "Other"], p=[0.6, 0.3, 0.1]) if race == "Chinese" else
    np.random.choice(["Hindu", "Christian", "Other"], p=[0.7, 0.2, 0.1]) if race == "India" else
    np.random.choice(["Islam", "Buddha", "Christian", "Hindu", "Other"])
    for race in data["Race"]
]

data.update({
    "Monthly_Income_RM": np.random.normal(2500, 500, size=num_respondents).astype(int),
    "Household_Size": np.random.poisson(4, size=num_respondents),
    "Employment_Status": np.random.choice(
        ["Smallholder Full-time", "Government servant", "Private sector", "Retired", "Self-employed", "Others"],
        p=[0.7, 0.05, 0.1, 0.05, 0.08, 0.02], size=num_respondents),
    "MSPO_Certification": ["Yes"] * num_respondents,
    "Average_Production_Tonnes": np.round(np.random.normal(10, 2, size=num_respondents), 1).clip(5, 20),
    "Production_Profit_RM": np.random.normal(5000, 1000, size=num_respondents).astype(int).clip(3000, 10000),
    "Cost_Fertilizer_Before": np.random.normal(500, 50, size=num_respondents).astype(int).clip(300, 800),
    "Cost_Fertilizer_After": np.random.normal(450, 50, size=num_respondents).astype(int).clip(300, 700),
    "Yield_Before_Tonnes": np.round(np.random.normal(8, 1.5, size=num_respondents), 1).clip(5, 15),
    "Yield_After_Tonnes": np.round(np.random.normal(10, 1.5, size=num_respondents), 1).clip(7, 20)
})

# Create DataFrame
df = pd.DataFrame(data)

# Save to Excel
df.to_excel("MSPO_Dummy_Data_384_Respondents.xlsx", index=False)
Editor is loading...