Untitled
unknown
plain_text
2 years ago
6.1 kB
7
Indexable
#глина, ниже есть решение норм raw = df steps = ['fall_1', 'fall_2', 'fall_3'] courses = set(df[steps].values.ravel('K')) from_course_to_size = {} from_course_to_size_second = {} for course in courses: if course == "Statistical Learning Theory" or course == "Высокопроизводительные вычисления": from_course_to_size[course] = 60 elif course == "Анализ неструктурированных данных": from_course_to_size[course] = 1000 else: from_course_to_size[course] = 30 ans = pd.DataFrame(columns=["id", "course1", "course2", "best", "worst"]) for course in courses: fall_1 = df[df["fall_1"] == course][["id", "percentile"]] fall_2 = df[(df["fall_2"] == course) & (df["spring_course_number"] == 2)][["id", "percentile"]] fall_1_2 = pd.DataFrame(pd.concat([fall_1, fall_2]).drop_duplicates().sort_values("percentile", ascending=True)) entered_people = pd.DataFrame(fall_1_2["id"].head(from_course_to_size[course])) start_not_entered = len(entered_people) not_entered = pd.DataFrame(fall_1_2.iloc[start_not_entered:]["id"]) print(len(not_entered), "не поступило", from_course_to_size[course], "всего мест", len(entered_people), "поступило", len(fall_1_2), "хотело") if from_course_to_size[course] > len(entered_people): from_course_to_size_second[course] = from_course_to_size[course] - len(entered_people) who_is_not_entered_but_was = pd.DataFrame(ans.loc[ans["id"].isin(not_entered["id"])]) who_is_not_entered_but_not_was = pd.DataFrame(not_entered.loc[~not_entered["id"].isin(who_is_not_entered_but_was["id"])]) who_is_second_time = pd.DataFrame(ans.loc[ans["id"].isin(entered_people["id"])]) who_is_first_time = pd.DataFrame(entered_people.loc[~entered_people["id"].isin(who_is_second_time["id"])]) # те кто не был еще в табличке и прошли ans = pd.merge(ans, who_is_first_time, on="id", how="outer") ans.loc[ans['id'].isin(who_is_first_time['id']), 'course1'] = course ans.loc[ans['id'].isin(who_is_first_time['id']), 'course2'] = "-" # те кто не был в табличке и не прошли ans = pd.merge(ans, who_is_not_entered_but_not_was, on="id", how="outer") ans.loc[ans['id'].isin(who_is_not_entered_but_not_was['id']), 'course1'] = "???" ans.loc[ans['id'].isin(not_entered['id']), 'course2'] = "-" #те кто был в табличке и прошел - отобрался на 2 курс для себя значит ans.loc[ans["id"].isin(who_is_second_time["id"]), "course2"] = course ans.loc[ans["id"].isin(who_is_not_entered_but_was["id"]), "course2"] = "???" ans.loc[ans["id"] == entered_people["id"].iloc[0], "best"] = True ans.loc[ans["id"] == entered_people["id"].iloc[start_not_entered - 1], "worst"] = True df = ans.copy() df_without_best = ans.drop(["best", "worst"], axis=1) df = raw.copy() steps = ['fall_1', 'fall_2', 'fall_3'] courses = set(df[steps].values.ravel('K')) from_course_to_size = {} for course in courses: if course == "Statistical Learning Theory" or course == "Высокопроизводительные вычисления": from_course_to_size[course] = 60 elif course == "Анализ неструктурированных данных": from_course_to_size[course] = 1000 else: from_course_to_size[course] = 30 ans = pd.DataFrame() ans[["id", "spring_course_number"]] = df[["id", "spring_course_number"]] ans["course1"] = "???" ans["course2"] = "-" ans.loc[ans["spring_course_number"] == 2, "course2"] = "???" for i in range(1, 4): for course in courses: if from_course_to_size[course] <= 0: continue # смотрим course1 и, если 2 курса у человека, тогда берем course2 if i == 1: pool = pd.DataFrame(df[(df["fall_1"] == course) | ((df["fall_2"] == course) & (df["spring_course_number"] == 2))][["id", "percentile"]]) pool = pool.drop_duplicates().sort_values("percentile", ascending=True) elif i == 2: pool = pd.DataFrame(df[((df["fall_2"] == course) & (df["spring_course_number"] == 1)) | ((df['fall_1'] != course) & (df['fall_2'] != course) & (df["fall_3"] == course) & (df["spring_course_number"] == 2))][["id", "percentile"]]) pool = pool.drop_duplicates().sort_values("percentile", ascending=True) elif i == 3: pool = pd.DataFrame(df[df["fall_3"] == course]).sort_values("percentile", ascending=True) entered_people = pd.DataFrame(pool["id"].head(from_course_to_size[course])) # debug # from_course_to_size[course] -= len(entered_people) # start_not_entered = len(entered_people) # not_entered = pd.DataFrame(pool.iloc[start_not_entered:]["id"]) # print(course, len(not_entered), "не поступило", from_course_to_size[course], "всего мест", len(entered_people), "поступило", len(pool), "хотело") from_course_to_size[course] -= len(entered_people) first_course = pd.DataFrame(ans[(ans['id'].isin(entered_people['id'])) & ((ans["course1"] == "???"))]["id"]) ans.loc[ans.isin(first_course["id"])["id"], 'course1'] = course mask = entered_people['id'].isin(first_course["id"]) mask = entered_people['id'].isin(first_course["id"]) entered_people = pd.DataFrame(entered_people[~mask]) ans.loc[ans.isin(entered_people["id"])["id"], "course2"] = course mask = df["id"].isin(ans[(ans["spring_course_number"] == 1) & (ans["course1"] != "???")]["id"]) df = pd.DataFrame(df[~mask]) mask = df["id"].isin(ans[(ans["spring_course_number"] == 2) & (ans["course1"] != "???") & (ans["course2"] != "???")]["id"]) df = pd.DataFrame(df[~mask]) if i == 2: mask = df["id"].isin(df[df["spring_course_number"] == 2]["id"]) df = pd.DataFrame(df[~mask])
Editor is loading...