Untitled

mail@pastecode.io avatar
unknown
plain_text
7 months ago
3.5 kB
1
Indexable
Never
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score

def logistic_regression(X, y, eta, iterations):
    m, n = X.shape
    w = np.zeros(n)  # Initialize weight vector with zeros

    for _ in range(iterations):
        for i in range(m):
            y_pred = np.dot(X[i], w)
            gradient = y[i] * X[i] / (1 + np.exp(y[i] * y_pred))
            w += eta * gradient

    return w

def main(iworkmode):
    iscenario = 0  ## =0 step size enumration Question 3,
                   ## =1 iteration number enumeration, Question 4

    # load the data
    X, y = load_breast_cancer(return_X_y=True)  ## X input, y output

    mdata, ndim = X.shape 

    y = 2 * y - 1    

    if iscenario == 0:  ## Question 3, step size enumeration
        neta = 10   ## number of different step sizes
        eta0 = 0.1  ## first setp size
        leta = [ eta0*(i+1) for i in range(neta)]  ## list of step sizes
        iteration = 50
    elif iscenario == 1: ## Question 4, iteration number enumeration
        niteration = 10  ## number of different iteration
        iteration0 = 10  ## first iteration number
        literation = [ iteration0*(i+1) for i in range(niteration)]
        eta = 0.1
  
    nfold = 5         ## number of folds 

    np.random.seed(12345) 

    cselection = KFold(n_splits=nfold, random_state=None, shuffle=False)

    X /= np.outer(np.ones(mdata),np.max(np.abs(X),0))

    max_avg_score = -1  # Initialize with a low value
    corresponding_eta = None

    if iscenario == 0:
        for eta in leta:
            avg_score = 0
            for train_index, val_index in cselection.split(X):
                X_train, X_val = X[train_index], X[val_index]
                y_train, y_val = y[train_index], y[val_index]

                # Train logistic regression model
                w = logistic_regression(X_train, y_train, eta, iteration)

                # Predict on validation set
                y_pred = np.dot(X_val, w)

                avg_score += roc_auc_score(y_val, y_pred)

            avg_score /= nfold

            if avg_score > max_avg_score:
                max_avg_score = avg_score
                corresponding_eta = eta

    elif iscenario == 1:
        for iteration in literation:
            avg_score = 0
            for train_index, val_index in cselection.split(X):
                X_train, X_val = X[train_index], X[val_index]
                y_train, y_val = y[train_index], y[val_index]

                # Train logistic regression model
                w = logistic_regression(X_train, y_train, eta, iteration)

                # Predict on validation set
                y_pred = np.dot(X_val, w)

                avg_score += roc_auc_score(y_val, y_pred)

            avg_score /= nfold

            if avg_score > max_avg_score:
                max_avg_score = avg_score
                corresponding_iteration = iteration

    if iscenario == 0:
        print(f"Maximum average score: {max_avg_score:.2f}")
        print(f"Corresponding step size: {corresponding_eta:.2f}")
    elif iscenario == 1:
        print(f"Maximum average score: {max_avg_score:.2f}")
        print(f"Corresponding iteration number: {corresponding_iteration}")

if __name__ == "__main__":
    main(0)  # Specify the scenario (0 for step size, 1 for iteration number)