Untitled
import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error house_data = { "Area": ["A", "B", "A", "C", "B", "C", "A", "C", "B", "A"], "SquareFeet": [1500, 2000, 1200, 1800, 2500, 2200, 1600, 2400, 1900, 1700], "Rooms": [3, 4, 2, 3, 5, 4, 3, 4, 5, 3], "Cost": [300000, 450000, 250000, 400000, 550000, 500000, 320000, 520000, 480000, 340000] } houses = pd.DataFrame(house_data) print("Initial Data Preview:") print(houses.head()) area_counts = houses["Area"].value_counts() plt.bar(area_counts.index, area_counts.values) plt.title("Distribution of Houses by Area") plt.xlabel("Area") plt.ylabel("Count") plt.show() area_counts.plot.pie(autopct="%1.1f%%", title="House Distribution by Area") plt.ylabel("") plt.show() plt.scatter(houses["SquareFeet"], houses["Cost"], alpha=0.7, color="teal") plt.title("Square Footage vs. Cost") plt.xlabel("Square Feet") plt.ylabel("Price") plt.show() features = houses[["SquareFeet", "Rooms"]] prices = houses["Cost"] X_train, X_test, y_train, y_test = train_test_split(features, prices, test_size=0.2, random_state=42) regressor = LinearRegression() regressor.fit(X_train, y_train) predictions = regressor.predict(X_test) error = mean_squared_error(y_test, predictions) print(f"Model Mean Squared Error: {error}") results = pd.DataFrame({"Actual Price": y_test, "Predicted Price": predictions}) print("\nComparison of Actual and Predicted Prices:") print(results)
Leave a Comment