Untitled
unknown
python
3 years ago
1.2 kB
9
Indexable
def make_partition(data, feature, threshold):
"""
This function will split the data into 2 branches
args:
* data(type: DataFrame): the input data
* feature(type: string): the attribute(column name)
* threshold(type: float): the threshold for splitting the data
return:
* left(type: DataFrame): the divided data that matches(less than or equal to) the assigned feature's threshold
* right(type: DataFrame): the divided data that doesn't match the assigned feature's threshold
"""
sorted_data = data.sort_values([feature], ascending = True)
split = 0
for i in range(0,data.shape[0]):
id = sorted_data.head(int(data.shape[0])).index[i]
if sorted_data[feature][id] <= threshold:
split = split+1
left, right = pd.DataFrame(),pd.DataFrame()
left=left.append(sorted_data[0:split],ignore_index=True)
right=right.append(sorted_data[split:data.shape[0]],ignore_index=True)
print(left)
return left, right
# [Note] You have to save the value of "ans_left" into the output file
left, right = make_partition(input_data, 'age', 61.0)
ans_left = left.shape[0]
print("ans_left = ", ans_left)Editor is loading...