Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
1.2 kB
1
Indexable
Never
# Performing Regression by principle of least square
def OLS_regression(df):       

        # Perform one-hot encoding for categorical variables
        dummy_cat_mp = pd.get_dummies(df['mp'], prefix='mp', drop_first=True, dtype=bool).astype(int)
        # # Combine continuous and dummy variables        
        X = pd.concat([df[['percentage_clustering','percentage_assumption','percentage_extresearch',\
                           'percentage_complex', 'pth']], dummy_cat_mp], axis=1)
        
        X = api.add_constant(X)
        y = df[['resolved']]              
        
        model = api.OLS(y, X)
        results = model.fit()
        # Get the regression coefficients
        coefficients = results.params
        print('The Length of Considered Dataframe ', len(df))
        print(coefficients, "\n\n")
        print('The Complete Dataset => ', dataframe)
        # print(results.rsquared)
        print('The under R Squared Dataset => ', df)
        print(results.predict(X))       
        return results.predict(X)
    
print("Function created OLS_regression(df) to generate OLS regression on input Data")