Untitled

 avatar
unknown
python
2 years ago
1.2 kB
6
Indexable
def make_partition(data, feature, threshold):
  """
  This function will split the data into 2 branches
  args:
  * data(type: DataFrame): the input data
  * feature(type: string): the attribute(column name)
  * threshold(type: float): the threshold for splitting the data
  return:
  * left(type: DataFrame): the divided data that matches(less than or equal to) the assigned feature's threshold
  * right(type: DataFrame): the divided data that doesn't match the assigned feature's threshold
  """
  sorted_data = data.sort_values([feature], ascending = True)
  split = 0
  for i in range(0,data.shape[0]):
    id = sorted_data.head(int(data.shape[0])).index[i]
    if sorted_data[feature][id] <= threshold:
      split = split+1
  
  left, right = pd.DataFrame(),pd.DataFrame()
  left=left.append(sorted_data[0:split],ignore_index=True)
  right=right.append(sorted_data[split:data.shape[0]],ignore_index=True)
  
  print(left)
  return left, right


# [Note] You have to save the value of "ans_left" into the output file
left, right = make_partition(input_data, 'age', 61.0)
ans_left = left.shape[0]
print("ans_left = ", ans_left)
Editor is loading...