Untitled

 avatar
user_9363972
python
2 months ago
2.3 kB
2
Indexable
Never
import pickle
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import pandas as pd
import seaborn as sns
df = pd.read_csv("population.csv")

highest =df["2020"].nlargest(20).tolist()
filtered_highest = df[df["2020"].isin(highest)]
sns.barplot(x="Country Name", y="2020", data=df.tail())
world = df.loc[df['Country Name'] == 'World']
world.drop(['Country Name', 'Country Code', 'Indicator Name',
           'Indicator Code'], axis=1, inplace=True)
world = world.T
world.dropna(inplace=True)
world = world.reset_index().rename(
    columns={259: 'population', 'index': 'year'})

world.tail()

sns.lineplot(x='year', y='population', data=world.tail(10))
sns.scatterplot(x='year', y='population', data=world.tail(10))


world["population"] = pd.to_numeric(world["population"])
world["year"] = pd.to_numeric(world["year"])
x = world["year"]
y = world["population"]

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
X_train = X_train.values.reshape(-1, 1)
X_test = X_test.values.reshape(-1, 1)
y_train = y_train.values.reshape(-1, 1)
y_test = y_test.values.reshape(-1, 1)

model = LinearRegression()
model = model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_pred

plt.xlabel("Year")
plt.ylabel("Population")
plt.title("True vs Predicted ")
sns.lineplot(x=X_test.reshape(-1), y=y_pred.reshape(-1))
sns.scatterplot(x=X_test.reshape(-1), y=y_pred.reshape(-1),
                marker="x", color="green", s=200)
sns.scatterplot(x=X_test.reshape(-1), y=y_test.reshape(-1), color="red")


year_input = int(input("Year : "))
predict_input = model.predict([[year_input]])
print(
    f"World Population Prediction in {year_input} is {int(predict_input[0][0])}")


with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)
    
year_input = int(input("Year ? "))
year = 2021
years = []
populations = []
for i in range(year_input):
    years.append(year + i)
    populations.append(model.predict([[year + i]])[0][0])
data_predictions = pd.DataFrame({'year': years, 'population': populations, })
data_predictions

sns.lineplot(x=data_predictions['year'], y=data_predictions['population'])
plt.xlabel("Year")
plt.ylabel("Population")
plt.title("Predictions")
Leave a Comment