Untitled
unknown
plain_text
2 years ago
6.1 kB
21
Indexable
#глина, ниже есть решение норм
raw = df
steps = ['fall_1', 'fall_2', 'fall_3']
courses = set(df[steps].values.ravel('K'))
from_course_to_size = {}
from_course_to_size_second = {}
for course in courses:
if course == "Statistical Learning Theory" or course == "Высокопроизводительные вычисления":
from_course_to_size[course] = 60
elif course == "Анализ неструктурированных данных":
from_course_to_size[course] = 1000
else:
from_course_to_size[course] = 30
ans = pd.DataFrame(columns=["id", "course1", "course2", "best", "worst"])
for course in courses:
fall_1 = df[df["fall_1"] == course][["id", "percentile"]]
fall_2 = df[(df["fall_2"] == course) & (df["spring_course_number"] == 2)][["id", "percentile"]]
fall_1_2 = pd.DataFrame(pd.concat([fall_1, fall_2]).drop_duplicates().sort_values("percentile", ascending=True))
entered_people = pd.DataFrame(fall_1_2["id"].head(from_course_to_size[course]))
start_not_entered = len(entered_people)
not_entered = pd.DataFrame(fall_1_2.iloc[start_not_entered:]["id"])
print(len(not_entered), "не поступило", from_course_to_size[course], "всего мест", len(entered_people), "поступило", len(fall_1_2), "хотело")
if from_course_to_size[course] > len(entered_people):
from_course_to_size_second[course] = from_course_to_size[course] - len(entered_people)
who_is_not_entered_but_was = pd.DataFrame(ans.loc[ans["id"].isin(not_entered["id"])])
who_is_not_entered_but_not_was = pd.DataFrame(not_entered.loc[~not_entered["id"].isin(who_is_not_entered_but_was["id"])])
who_is_second_time = pd.DataFrame(ans.loc[ans["id"].isin(entered_people["id"])])
who_is_first_time = pd.DataFrame(entered_people.loc[~entered_people["id"].isin(who_is_second_time["id"])])
# те кто не был еще в табличке и прошли
ans = pd.merge(ans, who_is_first_time, on="id", how="outer")
ans.loc[ans['id'].isin(who_is_first_time['id']), 'course1'] = course
ans.loc[ans['id'].isin(who_is_first_time['id']), 'course2'] = "-"
# те кто не был в табличке и не прошли
ans = pd.merge(ans, who_is_not_entered_but_not_was, on="id", how="outer")
ans.loc[ans['id'].isin(who_is_not_entered_but_not_was['id']), 'course1'] = "???"
ans.loc[ans['id'].isin(not_entered['id']), 'course2'] = "-"
#те кто был в табличке и прошел - отобрался на 2 курс для себя значит
ans.loc[ans["id"].isin(who_is_second_time["id"]), "course2"] = course
ans.loc[ans["id"].isin(who_is_not_entered_but_was["id"]), "course2"] = "???"
ans.loc[ans["id"] == entered_people["id"].iloc[0], "best"] = True
ans.loc[ans["id"] == entered_people["id"].iloc[start_not_entered - 1], "worst"] = True
df = ans.copy()
df_without_best = ans.drop(["best", "worst"], axis=1)
df = raw.copy()
steps = ['fall_1', 'fall_2', 'fall_3']
courses = set(df[steps].values.ravel('K'))
from_course_to_size = {}
for course in courses:
if course == "Statistical Learning Theory" or course == "Высокопроизводительные вычисления":
from_course_to_size[course] = 60
elif course == "Анализ неструктурированных данных":
from_course_to_size[course] = 1000
else:
from_course_to_size[course] = 30
ans = pd.DataFrame()
ans[["id", "spring_course_number"]] = df[["id", "spring_course_number"]]
ans["course1"] = "???"
ans["course2"] = "-"
ans.loc[ans["spring_course_number"] == 2, "course2"] = "???"
for i in range(1, 4):
for course in courses:
if from_course_to_size[course] <= 0:
continue
# смотрим course1 и, если 2 курса у человека, тогда берем course2
if i == 1:
pool = pd.DataFrame(df[(df["fall_1"] == course) | ((df["fall_2"] == course) & (df["spring_course_number"] == 2))][["id", "percentile"]])
pool = pool.drop_duplicates().sort_values("percentile", ascending=True)
elif i == 2:
pool = pd.DataFrame(df[((df["fall_2"] == course) & (df["spring_course_number"] == 1)) | ((df['fall_1'] != course) & (df['fall_2'] != course) & (df["fall_3"] == course) & (df["spring_course_number"] == 2))][["id", "percentile"]])
pool = pool.drop_duplicates().sort_values("percentile", ascending=True)
elif i == 3:
pool = pd.DataFrame(df[df["fall_3"] == course]).sort_values("percentile", ascending=True)
entered_people = pd.DataFrame(pool["id"].head(from_course_to_size[course]))
# debug
# from_course_to_size[course] -= len(entered_people)
# start_not_entered = len(entered_people)
# not_entered = pd.DataFrame(pool.iloc[start_not_entered:]["id"])
# print(course, len(not_entered), "не поступило", from_course_to_size[course], "всего мест", len(entered_people), "поступило", len(pool), "хотело")
from_course_to_size[course] -= len(entered_people)
first_course = pd.DataFrame(ans[(ans['id'].isin(entered_people['id'])) & ((ans["course1"] == "???"))]["id"])
ans.loc[ans.isin(first_course["id"])["id"], 'course1'] = course
mask = entered_people['id'].isin(first_course["id"])
mask = entered_people['id'].isin(first_course["id"])
entered_people = pd.DataFrame(entered_people[~mask])
ans.loc[ans.isin(entered_people["id"])["id"], "course2"] = course
mask = df["id"].isin(ans[(ans["spring_course_number"] == 1) & (ans["course1"] != "???")]["id"])
df = pd.DataFrame(df[~mask])
mask = df["id"].isin(ans[(ans["spring_course_number"] == 2) & (ans["course1"] != "???") & (ans["course2"] != "???")]["id"])
df = pd.DataFrame(df[~mask])
if i == 2:
mask = df["id"].isin(df[df["spring_course_number"] == 2]["id"])
df = pd.DataFrame(df[~mask])
Editor is loading...