linucb
unknown
python
8 months ago
2.0 kB
10
Indexable
class ControllerLayer:
def __init__(self):
#Linucb
self.alpha_values = np.linspace(0.2, 1.0, 0.5)
self.beta_values = np.linspace(0.2, 1.0, 0.5)
self.arm_count = len(self.alpha_values) * len(self.beta_values)
#LinUCB params
self.alpha_linucb = 0.5 # Exploration parameter
self.d = 64 # Feature dimension -- pick whatever is used at the tim.e
#Initialize matrices for each arm
self.A = [np.identity(self.d) for _ in range(self.arm_count)]
self.b = [np.zeros((self.d, 1)) for _ in range(self.arm_count)]
#Feature extractor
self.feature_extractor = MLPFeatureExtractor(
input_dim=128,
hidden_dims=[128, 64],
output_dim=self.d,
dropout=0.3
)
def get_weights(self, state_features):
"""Return alpha, beta weights based on current state"""
# Extract context features
x = self.feature_extractor(state_features)
# Compute UCB scores for each arm
ucb_scores = []
for arm in range(self.arm_count):
A_inv = np.linalg.inv(self.A[arm])
theta = A_inv.dot(self.b[arm])
ucb = theta.T.dot(x) + self.alpha_linucb * np.sqrt(x.T.dot(A_inv).dot(x))
ucb_scores.append(ucb)
#select best arm\
arm = np.argmax(ucb_scores)
#convert arm index to alpha, beta values
alpha_idx = arm // len(self.beta_values)
beta_idx = arm % len(self.beta_values)
return self.alpha_values[alpha_idx], self.beta_values[beta_idx]
def update(self, state_features, selected_arm, reward):
"""Update model based on observed reward"""
x = self.feature_extractor(state_features)
##Update matrices for the selected arm
self.A[selected_arm] += x.dot(x.T)
self.b[selected_arm] += reward * x
Editor is loading...
Leave a Comment