linucb
unknown
python
19 days ago
2.0 kB
8
Indexable
class ControllerLayer: def __init__(self): #Linucb self.alpha_values = np.linspace(0.2, 1.0, 0.5) self.beta_values = np.linspace(0.2, 1.0, 0.5) self.arm_count = len(self.alpha_values) * len(self.beta_values) #LinUCB params self.alpha_linucb = 0.5 # Exploration parameter self.d = 64 # Feature dimension -- pick whatever is used at the tim.e #Initialize matrices for each arm self.A = [np.identity(self.d) for _ in range(self.arm_count)] self.b = [np.zeros((self.d, 1)) for _ in range(self.arm_count)] #Feature extractor self.feature_extractor = MLPFeatureExtractor( input_dim=128, hidden_dims=[128, 64], output_dim=self.d, dropout=0.3 ) def get_weights(self, state_features): """Return alpha, beta weights based on current state""" # Extract context features x = self.feature_extractor(state_features) # Compute UCB scores for each arm ucb_scores = [] for arm in range(self.arm_count): A_inv = np.linalg.inv(self.A[arm]) theta = A_inv.dot(self.b[arm]) ucb = theta.T.dot(x) + self.alpha_linucb * np.sqrt(x.T.dot(A_inv).dot(x)) ucb_scores.append(ucb) #select best arm\ arm = np.argmax(ucb_scores) #convert arm index to alpha, beta values alpha_idx = arm // len(self.beta_values) beta_idx = arm % len(self.beta_values) return self.alpha_values[alpha_idx], self.beta_values[beta_idx] def update(self, state_features, selected_arm, reward): """Update model based on observed reward""" x = self.feature_extractor(state_features) ##Update matrices for the selected arm self.A[selected_arm] += x.dot(x.T) self.b[selected_arm] += reward * x
Editor is loading...
Leave a Comment