Untitled

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

house_data = {
    "Area": ["A", "B", "A", "C", "B", "C", "A", "C", "B", "A"],
    "SquareFeet": [1500, 2000, 1200, 1800, 2500, 2200, 1600, 2400, 1900, 1700],
    "Rooms": [3, 4, 2, 3, 5, 4, 3, 4, 5, 3],
    "Cost": [300000, 450000, 250000, 400000, 550000, 500000, 320000, 520000, 480000, 340000]
}

houses = pd.DataFrame(house_data)

print("Initial Data Preview:")
print(houses.head())

area_counts = houses["Area"].value_counts()
plt.bar(area_counts.index, area_counts.values)
plt.title("Distribution of Houses by Area")
plt.xlabel("Area")
plt.ylabel("Count")
plt.show()

area_counts.plot.pie(autopct="%1.1f%%", title="House Distribution by Area")
plt.ylabel("")  
plt.show()

plt.scatter(houses["SquareFeet"], houses["Cost"], alpha=0.7, color="teal")
plt.title("Square Footage vs. Cost")
plt.xlabel("Square Feet")
plt.ylabel("Price")
plt.show()

features = houses[["SquareFeet", "Rooms"]]
prices = houses["Cost"]

X_train, X_test, y_train, y_test = train_test_split(features, prices, test_size=0.2, random_state=42)

regressor = LinearRegression()
regressor.fit(X_train, y_train)

predictions = regressor.predict(X_test)
error = mean_squared_error(y_test, predictions)
print(f"Model Mean Squared Error: {error}")

results = pd.DataFrame({"Actual Price": y_test, "Predicted Price": predictions})
print("\nComparison of Actual and Predicted Prices:")
print(results)
Editor is loading...