Untitled
unknown
plain_text
a year ago
4.6 kB
7
Indexable
ML Assignment1: Uber import pandas as pd import numpy as np import matplotlib.pyplot as plt mydata = pd.read_csv('uber.csv') md['pickup_datetime'] = pd.to_datetime(md['pickup_datetime']) md.dropna(inplace=True) corr_mat = md.corr(numeric_only=True) plt.boxplot(md['fare_amount']) #removing outliers using interquartile rang q1 = md['fare_amount'].quantile(0.25) q3 = md['fare_amount'].quantile(0.75) IQR = q3-q1 lower_bound = q1 - 1.5 * IQR upper_bound = q3 + 1.5 * IQR md = md[(md['fare_amount']>lower_bound) & (md['fare_amount']<upper_bound)] plt.boxplot(md['fare_amount']) from sklearn.model_selection import train_test_split y = md['fare_amount'] x = md[['pickup_longitude','pickup_latitude','dropoff_longitude' ,'dropoff_latitude' ,'passenger_count']] x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.2) from sklearn.linear_model import LinearRegression from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import r2_score, mean_squared_error lr_model = LinearRegression() lr_model.fit(x_train,y_train) lr_ypredict = lr_model.predict(x_test) lr_r2score = r2_score(y_test, lr_ypredict) lr_rmse = mean_squared_error(y_test, lr_ypredict, squared = False) print("Linear regression R2 score : ", lr_r2score) rf_model = RandomForestRegressor(n_estimators = 100, random_state = 42) rf_model.fit(x_train,y_train) rf_ypredict = rf_model.predict(x_test) rf_r2score = r2_score(y_test, rf_ypredict) rf_rmse = mean_squared_error(y_test, rf_ypredict, squared = False) print("Random forest regression R2 score : ", rf_r2score) print("Random forest regression RMSE:", rf_rmse) print("Linear Regression RMSE:", lr_rmse) Assignment 2: emailspam import numpy as np import pandas as pd data = pd.read_csv("emails.csv") x = data.iloc[:, 1:3000] x y = data.iloc[:,-1].values y from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.metrics import accuracy_score x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2) # KNN classification knn_classifier = KNeighborsClassifier(n_neighbors=5) knn_classifier.fit(x_train, y_train) knn_y_pred = knn_classifier.predict(x_test) knn_accuracy = accuracy_score(y_test, knn_y_pred) print("KNN Accuracy:", knn_accuracy) # SVM classification svm_classifier = SVC(kernel='rbf') svm_classifier.fit(x_train, y_train) svm_y_pred = svm_classifier.predict(x_test) svm_accuracy = accuracy_score(y_test, svm_y_pred) print("SVM Accuracy:", svm_accuracy) Assignment 3 : gradient decent import numpy as np import pandas as pd import matplotlib.pyplot as plt def objective(x): return (x+3)**2 def derivative(x): return 2*(x+3) def gradientDesc(alpha,start,iter): x_list = list() x = start x_list.append(x) for i in range(iter): g = derivative(x) x = x-(alpha*g) x_list.append(x) return (x_list) alpha = 0.01 start = 5 iter = 200 x_cor = np.linspace(-15,15,100) plt.plot(x_cor, objective(x_cor)) plt.plot(start, objective(start), 'ro') x_list = gradientDesc(alpha, start, iter) x_cor = np.linspace(-5,5,100) plt.plot(x_cor, objective(x_cor)) x_list = np.array(x_list) plt.plot(x_list, objective(x_list),'.-', color ='pink') plt.show() print(x_list[-1]) ass 4: k-means clustering import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.cluster import KMeans from sklearn.preprocessing import StandardScaler data = pd.read_csv('sample.csv', encoding='unicode-escape') data.info() features = data[['QUANTITYORDERED', 'PRICEEACH', 'SALES', 'MSRP', 'YEAR_ID', 'QTR_ID', 'MONTH_ID']] # Handling missing values (you might want to customise this based on your data) features.isna().sum() # scaler = StandardScaler() # scaled_features = scaler.fit_transform(features) sse = [] #SUM OF SQUARED ERRORS for k in range(1, 11): kmeans = KMeans(n_clusters=k, random_state=42) kmeans.fit(features) sse.append(kmeans.inertia_) # model ka error # plt.figure(figsize=(8, 6)) plt.plot(range(1, 11), sse, marker='x') plt.title('Elbow Method fomber of Clusters (k)') plt.ylabel('Sum of Sqr Optimal k') plt.xlabel('Nuuared Distances') plt.text(2.5, 0.4, 'Optimal k') plt.show() optimal_k = 6 # replace with your chosen k kmeans_optimal = KMeans(n_clusters=optimal_k, random_state=42) clusters = kmeans_optimal.fit_predict(features) print(clusters) plt.scatter(features['QUANTITYORDERED'], features['SALES'], c=clusters) plt.title('K-Means Clustering') plt.xlabel('Quantity Ordered') plt.ylabel('Sales') plt.show()
Editor is loading...
Leave a Comment