Untitled
unknown
python
2 years ago
1.2 kB
6
Indexable
def make_partition(data, feature, threshold): """ This function will split the data into 2 branches args: * data(type: DataFrame): the input data * feature(type: string): the attribute(column name) * threshold(type: float): the threshold for splitting the data return: * left(type: DataFrame): the divided data that matches(less than or equal to) the assigned feature's threshold * right(type: DataFrame): the divided data that doesn't match the assigned feature's threshold """ sorted_data = data.sort_values([feature], ascending = True) split = 0 for i in range(0,data.shape[0]): id = sorted_data.head(int(data.shape[0])).index[i] if sorted_data[feature][id] <= threshold: split = split+1 left, right = pd.DataFrame(),pd.DataFrame() left=left.append(sorted_data[0:split],ignore_index=True) right=right.append(sorted_data[split:data.shape[0]],ignore_index=True) print(left) return left, right # [Note] You have to save the value of "ans_left" into the output file left, right = make_partition(input_data, 'age', 61.0) ans_left = left.shape[0] print("ans_left = ", ans_left)
Editor is loading...