Untitled
unknown
python
a year ago
2.8 kB
18
Indexable
N = 15 # nunber of request groups GPUs = 5 # number of GPUs SLOTs = 3 #number of slots per GPU MODELs = 2 #number of models being served (assume serially labeled from 0 to MODELs-1) MODEL_SWAP_TIME = 5 #Time required to swap two models #Sample Input groups = [2]*30 # Group completion times slos = [100]*30 # Group SLOs models = [1]*15+[2]*15 # Model types per group #Model initialization model = Model("scheduling") x = model.addVars(GPUs,N, N, vtype=GRB.BINARY, name='x') completion_slot = model.addVars(GPUs,N,vtype=GRB.INTEGER, name='ct') model_slot = model.addVars(GPUs,N,vtype=GRB.INTEGER, name='model') transition_slot = model.addVars(GPUs,N,vtype=GRB.INTEGER, name='trans') slo_slot = model.addVars(GPUs,N,vtype=GRB.INTEGER, name='slo') penalty_slot = model.addVars(GPUs,N,vtype=GRB.INTEGER, name='penalty',lb=-100000) #Group assignment to GPU slot for i in range(N): model.addConstr(quicksum(x[g,i,slot] for g in range(GPUs) for slot in range(SLOTs)) == 1) #GPU slot assignment to group for g in range(GPUs): for slot in range(SLOTs): model.addConstr(quicksum(x[g,i,slot] for i in range(N)) == 1) #Calculating model type and SLO for all GPU slots for g in range(GPUs): for slot in range(SLOTs): model.addConstr(model_slot[g,slot] == quicksum(models[i]*x[g,i,slot] for i in range(N))) model.addConstr(slo_slot[g,slot] == quicksum(slos[i]*x[g,i,slot] for i in range(N))) #Initializing model swap time for first GPU slot to 0 for g in range(GPUs): model.addConstr(transition_slot[g,0]==0) #Calculating model swap times based on adjacent GPU slots for g in range(GPUs): for slot in range(1,SLOTs): model.addConstr(model_slot[g,slot]-model_slot[g,slot-1]<=1+MODELs-MODELs*transition_slot[g,slot]) model.addConstr(model_slot[g,slot]-model_slot[g,slot-1]>=MODELs*transition_slot[g,slot]-MODELs-1) model.addConstr(model_slot[g,slot]-model_slot[g,slot-1]<=MODELs*transition_slot[g,slot]-1) model.addConstr(model_slot[g,slot]-model_slot[g,slot-1]>=1-MODELs*transition_slot[g,slot]) #Estimating cumulative completion time per GPU slot for g in range(GPUs): for slot in range(SLOTs): model.addConstr(completion_slot[g,slot] == quicksum(groups[i]*x[g,i,j] for i in range(N) for j in range(slot+1))) #Estimating penalty for violating SLOs for g in range(GPUs): for slot in range(SLOTs): model.addConstr(penalty_slot[g,slot] == completion_slot[g,slot] + MODEL_SWAP_TIME*transition_slot[g,slot] - slo_slot[g,slot]) #Constraining no SLO violation for g in range(GPUs): for slot in range(SLOTs): model.addConstr(penalty_slot[g,slot] <= 0) #Minimize total penalty model.setObjective(quicksum(penalty_slot[g,slot] for g in range(GPUs) for slot in range(SLOTs)), GRB.MINIMIZE) model.optimize()
Editor is loading...
Leave a Comment