Untitled
unknown
plain_text
2 years ago
1.2 kB
4
Indexable
# Performing Regression by principle of least square def OLS_regression(df): # Perform one-hot encoding for categorical variables dummy_cat_mp = pd.get_dummies(df['mp'], prefix='mp', drop_first=True, dtype=bool).astype(int) # # Combine continuous and dummy variables X = pd.concat([df[['percentage_clustering','percentage_assumption','percentage_extresearch',\ 'percentage_complex', 'pth']], dummy_cat_mp], axis=1) X = api.add_constant(X) y = df[['resolved']] model = api.OLS(y, X) results = model.fit() # Get the regression coefficients coefficients = results.params print('The Length of Considered Dataframe ', len(df)) print(coefficients, "\n\n") print('The Complete Dataset => ', dataframe) # print(results.rsquared) print('The under R Squared Dataset => ', df) print(results.predict(X)) return results.predict(X) print("Function created OLS_regression(df) to generate OLS regression on input Data")
Editor is loading...