Untitled

mail@pastecode.io avatar
unknown
plain_text
7 months ago
6.1 kB
3
Indexable
Never
#глина, ниже есть решение норм
raw = df
steps = ['fall_1', 'fall_2', 'fall_3']
courses = set(df[steps].values.ravel('K'))
from_course_to_size = {}
from_course_to_size_second = {}
for course in courses:
    if course == "Statistical Learning Theory" or course == "Высокопроизводительные вычисления":
        from_course_to_size[course] = 60
    elif course == "Анализ неструктурированных данных":
        from_course_to_size[course] = 1000
    else:    
        from_course_to_size[course] = 30
        
ans = pd.DataFrame(columns=["id", "course1", "course2", "best", "worst"])

for course in courses:
    fall_1 = df[df["fall_1"] == course][["id", "percentile"]]
    fall_2 = df[(df["fall_2"] == course) & (df["spring_course_number"] == 2)][["id", "percentile"]]
    fall_1_2 = pd.DataFrame(pd.concat([fall_1, fall_2]).drop_duplicates().sort_values("percentile", ascending=True))

    entered_people = pd.DataFrame(fall_1_2["id"].head(from_course_to_size[course]))
    
    start_not_entered = len(entered_people)
    
    not_entered = pd.DataFrame(fall_1_2.iloc[start_not_entered:]["id"])

    print(len(not_entered), "не поступило", from_course_to_size[course], "всего мест", len(entered_people), "поступило", len(fall_1_2), "хотело")

    if from_course_to_size[course] > len(entered_people):
        from_course_to_size_second[course] = from_course_to_size[course] - len(entered_people)

    who_is_not_entered_but_was = pd.DataFrame(ans.loc[ans["id"].isin(not_entered["id"])])

    who_is_not_entered_but_not_was = pd.DataFrame(not_entered.loc[~not_entered["id"].isin(who_is_not_entered_but_was["id"])])

    who_is_second_time = pd.DataFrame(ans.loc[ans["id"].isin(entered_people["id"])])

    who_is_first_time = pd.DataFrame(entered_people.loc[~entered_people["id"].isin(who_is_second_time["id"])])

    # те кто не был еще в табличке и прошли
    ans = pd.merge(ans, who_is_first_time, on="id", how="outer")

    ans.loc[ans['id'].isin(who_is_first_time['id']), 'course1'] = course

    ans.loc[ans['id'].isin(who_is_first_time['id']), 'course2'] = "-"

    # те кто не был в табличке и не прошли
    ans = pd.merge(ans, who_is_not_entered_but_not_was, on="id", how="outer")

    ans.loc[ans['id'].isin(who_is_not_entered_but_not_was['id']), 'course1'] = "???"
    ans.loc[ans['id'].isin(not_entered['id']), 'course2'] = "-"

    #те кто был в табличке и прошел - отобрался на 2 курс для себя значит

    ans.loc[ans["id"].isin(who_is_second_time["id"]), "course2"] = course
    ans.loc[ans["id"].isin(who_is_not_entered_but_was["id"]), "course2"] = "???"

    ans.loc[ans["id"] == entered_people["id"].iloc[0], "best"] = True
    ans.loc[ans["id"] == entered_people["id"].iloc[start_not_entered - 1], "worst"] = True

df = ans.copy()
df_without_best = ans.drop(["best", "worst"], axis=1)




df = raw.copy()
steps = ['fall_1', 'fall_2', 'fall_3']
courses = set(df[steps].values.ravel('K'))
from_course_to_size = {}
for course in courses:
    if course == "Statistical Learning Theory" or course == "Высокопроизводительные вычисления":
        from_course_to_size[course] = 60
    elif course == "Анализ неструктурированных данных":
        from_course_to_size[course] = 1000
    else:    
        from_course_to_size[course] = 30
        
ans = pd.DataFrame()
ans[["id", "spring_course_number"]] = df[["id", "spring_course_number"]]
ans["course1"] = "???"
ans["course2"] = "-"
ans.loc[ans["spring_course_number"] == 2, "course2"] = "???"
for i in range(1, 4):
    for course in courses:
        if from_course_to_size[course] <= 0:
            continue
        # смотрим course1 и, если 2 курса у человека, тогда берем course2
        if i == 1:
            pool = pd.DataFrame(df[(df["fall_1"] == course) | ((df["fall_2"] == course) & (df["spring_course_number"] == 2))][["id", "percentile"]])
            pool = pool.drop_duplicates().sort_values("percentile", ascending=True)
        elif i == 2:
            pool = pd.DataFrame(df[((df["fall_2"] == course) & (df["spring_course_number"] == 1)) | ((df['fall_1'] != course) & (df['fall_2'] != course) & (df["fall_3"] == course) & (df["spring_course_number"] == 2))][["id", "percentile"]])
            pool = pool.drop_duplicates().sort_values("percentile", ascending=True)     
        elif i == 3:
            pool = pd.DataFrame(df[df["fall_3"] == course]).sort_values("percentile", ascending=True)

        entered_people = pd.DataFrame(pool["id"].head(from_course_to_size[course]))

        # debug
        # from_course_to_size[course] -= len(entered_people)
        # start_not_entered = len(entered_people)
        # not_entered = pd.DataFrame(pool.iloc[start_not_entered:]["id"])
        # print(course, len(not_entered), "не поступило", from_course_to_size[course], "всего мест", len(entered_people), "поступило", len(pool), "хотело")

        from_course_to_size[course] -= len(entered_people)

        first_course = pd.DataFrame(ans[(ans['id'].isin(entered_people['id'])) & ((ans["course1"] == "???"))]["id"])
        
        ans.loc[ans.isin(first_course["id"])["id"], 'course1'] = course

        mask = entered_people['id'].isin(first_course["id"])
        
         mask = entered_people['id'].isin(first_course["id"])
        entered_people = pd.DataFrame(entered_people[~mask])

        ans.loc[ans.isin(entered_people["id"])["id"], "course2"] = course


        mask = df["id"].isin(ans[(ans["spring_course_number"] == 1) & (ans["course1"] != "???")]["id"])
        df = pd.DataFrame(df[~mask])
        mask = df["id"].isin(ans[(ans["spring_course_number"] == 2) & (ans["course1"] != "???") & (ans["course2"] != "???")]["id"])
        df = pd.DataFrame(df[~mask])

    if i == 2:
        mask = df["id"].isin(df[df["spring_course_number"] == 2]["id"])
        df = pd.DataFrame(df[~mask])