Untitled
unknown
python
2 years ago
2.8 kB
28
Indexable
N = 15 # nunber of request groups
GPUs = 5 # number of GPUs
SLOTs = 3 #number of slots per GPU
MODELs = 2 #number of models being served (assume serially labeled from 0 to MODELs-1)
MODEL_SWAP_TIME = 5 #Time required to swap two models
#Sample Input
groups = [2]*30 # Group completion times
slos = [100]*30 # Group SLOs
models = [1]*15+[2]*15 # Model types per group
#Model initialization
model = Model("scheduling")
x = model.addVars(GPUs,N, N, vtype=GRB.BINARY, name='x')
completion_slot = model.addVars(GPUs,N,vtype=GRB.INTEGER, name='ct')
model_slot = model.addVars(GPUs,N,vtype=GRB.INTEGER, name='model')
transition_slot = model.addVars(GPUs,N,vtype=GRB.INTEGER, name='trans')
slo_slot = model.addVars(GPUs,N,vtype=GRB.INTEGER, name='slo')
penalty_slot = model.addVars(GPUs,N,vtype=GRB.INTEGER, name='penalty',lb=-100000)
#Group assignment to GPU slot
for i in range(N):
model.addConstr(quicksum(x[g,i,slot] for g in range(GPUs) for slot in range(SLOTs)) == 1)
#GPU slot assignment to group
for g in range(GPUs):
for slot in range(SLOTs):
model.addConstr(quicksum(x[g,i,slot] for i in range(N)) == 1)
#Calculating model type and SLO for all GPU slots
for g in range(GPUs):
for slot in range(SLOTs):
model.addConstr(model_slot[g,slot] == quicksum(models[i]*x[g,i,slot] for i in range(N)))
model.addConstr(slo_slot[g,slot] == quicksum(slos[i]*x[g,i,slot] for i in range(N)))
#Initializing model swap time for first GPU slot to 0
for g in range(GPUs):
model.addConstr(transition_slot[g,0]==0)
#Calculating model swap times based on adjacent GPU slots
for g in range(GPUs):
for slot in range(1,SLOTs):
model.addConstr(model_slot[g,slot]-model_slot[g,slot-1]<=1+MODELs-MODELs*transition_slot[g,slot])
model.addConstr(model_slot[g,slot]-model_slot[g,slot-1]>=MODELs*transition_slot[g,slot]-MODELs-1)
model.addConstr(model_slot[g,slot]-model_slot[g,slot-1]<=MODELs*transition_slot[g,slot]-1)
model.addConstr(model_slot[g,slot]-model_slot[g,slot-1]>=1-MODELs*transition_slot[g,slot])
#Estimating cumulative completion time per GPU slot
for g in range(GPUs):
for slot in range(SLOTs):
model.addConstr(completion_slot[g,slot] == quicksum(groups[i]*x[g,i,j] for i in range(N) for j in range(slot+1)))
#Estimating penalty for violating SLOs
for g in range(GPUs):
for slot in range(SLOTs):
model.addConstr(penalty_slot[g,slot] == completion_slot[g,slot] + MODEL_SWAP_TIME*transition_slot[g,slot] - slo_slot[g,slot])
#Constraining no SLO violation
for g in range(GPUs):
for slot in range(SLOTs):
model.addConstr(penalty_slot[g,slot] <= 0)
#Minimize total penalty
model.setObjective(quicksum(penalty_slot[g,slot] for g in range(GPUs) for slot in range(SLOTs)), GRB.MINIMIZE)
model.optimize()Editor is loading...
Leave a Comment