Untitled

def compute_gradients(x: np.ndarray, prediction: np.ndarray, target: np.ndarray) -> np.ndarray:
    """
    Computes the gradient of the loss function w.r.t the parameters
    
    Args:
        x (np.ndarray): Numpy array of shape [batch size x num_features]
        prediction (np.ndarray): Numpy array of shape [batch size x num_classes]
        target (np.ndarray): Numpy array of shape  [batch size, ]
    
    Returns:
        grad_W (np.ndarray): Numpy array of shape [num_classes x num_features] 
                             i.e. same as the weights matrix
        grad_b (np.ndarray): Numpy array of shape [num_classes, ]
    """

    batch_size = x.shape[0]

    # cf. chapter 7 logistic regression ==> extension to softmax for k-class problem 
    # a vérifier ... 
    grad_W = -1 / batch_size * (prediction - target[:, np.newaxis]).T @ x
    grad_b = -1 / batch_size * (prediction - target[:, np.newaxis]).T.sum(axis = 1)

    return grad_W, grad_b
Editor is loading...