Untitled
unknown
plain_text
2 years ago
1.2 kB
5
Indexable
# Performing Regression by principle of least square
def OLS_regression(df):
# Perform one-hot encoding for categorical variables
dummy_cat_mp = pd.get_dummies(df['mp'], prefix='mp', drop_first=True, dtype=bool).astype(int)
# # Combine continuous and dummy variables
X = pd.concat([df[['percentage_clustering','percentage_assumption','percentage_extresearch',\
'percentage_complex', 'pth']], dummy_cat_mp], axis=1)
X = api.add_constant(X)
y = df[['resolved']]
model = api.OLS(y, X)
results = model.fit()
# Get the regression coefficients
coefficients = results.params
print('The Length of Considered Dataframe ', len(df))
print(coefficients, "\n\n")
print('The Complete Dataset => ', dataframe)
# print(results.rsquared)
print('The under R Squared Dataset => ', df)
print(results.predict(X))
return results.predict(X)
print("Function created OLS_regression(df) to generate OLS regression on input Data")Editor is loading...